1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTreePtr oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtFlags |= GTF_CALL;
69 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
71 tree->gtCall.gtCallType = CT_HELPER;
72 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
73 tree->gtCall.gtCallArgs = args;
74 tree->gtCall.gtCallObjp = nullptr;
75 tree->gtCall.gtCallLateArgs = nullptr;
76 tree->gtCall.fgArgInfo = nullptr;
77 tree->gtCall.gtRetClsHnd = nullptr;
78 tree->gtCall.gtCallMoreFlags = 0;
79 tree->gtCall.gtInlineCandidateInfo = nullptr;
80 tree->gtCall.gtControlExpr = nullptr;
83 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
87 // Helper calls are never candidates.
89 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
92 #ifdef FEATURE_READYTORUN_COMPILER
93 tree->gtCall.gtEntryPoint.addr = nullptr;
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97 if (varTypeIsLong(tree))
99 GenTreeCall* callNode = tree->AsCall();
100 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101 retTypeDesc->Reset();
102 retTypeDesc->InitializeLongReturnType(this);
103 callNode->ClearOtherRegs();
105 #endif // _TARGET_XXX_
107 /* Perform the morphing */
109 tree = fgMorphArgs(tree->AsCall());
114 /*****************************************************************************
116 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
117 * This is done when code generation can't create straight-line code to do it.
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
121 #ifndef LEGACY_BACKEND
123 #else // LEGACY_BACKEND
124 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
128 /*****************************************************************************
130 * Morph a cast node (we perform some very simple transformations here).
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
139 noway_assert(tree->gtOper == GT_CAST);
140 noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
142 /* The first sub-operand is the thing being cast */
144 GenTreePtr oper = tree->gtCast.CastOp();
145 var_types srcType = genActualType(oper->TypeGet());
148 var_types dstType = tree->CastToType();
149 unsigned dstSize = genTypeSize(dstType);
151 // See if the cast has to be done in two steps. R -> I
152 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
154 // Only x86 must go through TYP_DOUBLE to get to all
155 // integral types everybody else can get straight there
156 // except for when using helpers
157 if (srcType == TYP_FLOAT
158 #if !FEATURE_STACK_FP_X87
160 #if defined(_TARGET_ARM64_)
161 // Amd64: src = float, dst is overflow conversion.
162 // This goes through helper and hence src needs to be converted to double.
163 && tree->gtOverflow()
164 #elif defined(_TARGET_AMD64_)
165 // Amd64: src = float, dst = uint64 or overflow conversion.
166 // This goes through helper and hence src needs to be converted to double.
167 && (tree->gtOverflow() || (dstType == TYP_ULONG))
168 #elif defined(_TARGET_ARM_)
169 // Arm: src = float, dst = int64/uint64 or overflow conversion.
170 && (tree->gtOverflow() || varTypeIsLong(dstType))
173 #endif // FEATURE_STACK_FP_X87
176 oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
179 // do we need to do it in two steps R -> I, '-> smallType
180 CLANG_FORMAT_COMMENT_ANCHOR;
182 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
183 if (dstSize < genTypeSize(TYP_INT))
185 oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
186 oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
187 tree->gtFlags &= ~GTF_UNSIGNED;
190 if (dstSize < sizeof(void*))
192 oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
193 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
198 /* Note that if we need to use a helper call then we can not morph oper */
199 if (!tree->gtOverflow())
201 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
207 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
208 #ifdef LEGACY_BACKEND
209 // the RyuJIT backend does not use the x87 FPU and therefore
210 // does not support folding the cast conv.i4(round.d(d))
211 if ((oper->gtOper == GT_INTRINSIC) &&
212 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
214 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
215 oper->gtType = dstType;
216 return fgMorphTree(oper);
218 // if SSE2 is not enabled, we need the helper
220 #endif // LEGACY_BACKEND
221 if (!opts.compCanUseSSE2)
223 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
226 #endif // _TARGET_X86_
230 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
233 #else // _TARGET_ARM_
235 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
236 #endif // _TARGET_ARM_
238 #ifdef _TARGET_AMD64_
239 // SSE2 has instructions to convert a float/double directly to a long
244 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
245 #endif //_TARGET_AMD64_
247 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
251 #endif // _TARGET_ARM64_
258 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
260 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
262 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
264 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
269 noway_assert(!"Unexpected dstType");
272 #ifndef _TARGET_64BIT_
273 // The code generation phase (for x86 & ARM32) does not handle casts
274 // directly from [u]long to anything other than [u]int. Insert an
275 // intermediate cast to native int.
276 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
278 oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
279 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
280 tree->gtFlags &= ~GTF_UNSIGNED;
282 #endif //!_TARGET_64BIT_
285 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
286 !varTypeIsLong(oper->gtCast.CastOp()))
288 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
289 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
290 // This happens semi-frequently because there is no IL 'conv.r4.un'
291 oper->gtType = TYP_FLOAT;
292 oper->CastToType() = TYP_FLOAT;
293 return fgMorphTree(oper);
295 // converts long/ulong --> float/double casts into helper calls.
296 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
298 if (dstType == TYP_FLOAT)
300 // there is only a double helper, so we
301 // - change the dsttype to double
302 // - insert a cast from double to float
303 // - recurse into the resulting tree
304 tree->CastToType() = TYP_DOUBLE;
305 tree->gtType = TYP_DOUBLE;
307 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
309 return fgMorphTree(tree);
311 if (tree->gtFlags & GTF_UNSIGNED)
312 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
313 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
315 #endif //_TARGET_ARM_
317 #ifdef _TARGET_AMD64_
318 // Do we have to do two step U4/8 -> R4/8 ?
319 // Codegen supports the following conversion as one-step operation
323 // The following conversions are performed as two-step operations using above.
324 // U4 -> R4/8 = U4-> Long -> R4/8
325 // U8 -> R4 = U8 -> R8 -> R4
326 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
328 srcType = genUnsignedType(srcType);
330 if (srcType == TYP_ULONG)
332 if (dstType == TYP_FLOAT)
334 // Codegen can handle U8 -> R8 conversion.
335 // U8 -> R4 = U8 -> R8 -> R4
336 // - change the dsttype to double
337 // - insert a cast from double to float
338 // - recurse into the resulting tree
339 tree->CastToType() = TYP_DOUBLE;
340 tree->gtType = TYP_DOUBLE;
341 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
342 return fgMorphTree(tree);
345 else if (srcType == TYP_UINT)
347 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
348 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
349 tree->gtFlags &= ~GTF_UNSIGNED;
352 #endif // _TARGET_AMD64_
355 // Do we have to do two step U4/8 -> R4/8 ?
356 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
358 srcType = genUnsignedType(srcType);
360 if (srcType == TYP_ULONG)
362 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
364 else if (srcType == TYP_UINT)
366 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
367 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
368 tree->gtFlags &= ~GTF_UNSIGNED;
369 #ifndef LEGACY_BACKEND
370 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
374 #ifndef LEGACY_BACKEND
375 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
377 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
380 #endif //_TARGET_XARCH_
381 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
383 // We are casting away GC information. we would like to just
384 // change the type to int, however this gives the emitter fits because
385 // it believes the variable is a GC variable at the begining of the
386 // instruction group, but is not turned non-gc by the code generator
387 // we fix this by copying the GC pointer to a non-gc pointer temp.
388 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
390 // We generate an assignment to an int and then do the cast from an int. With this we avoid
391 // the gc problem and we allow casts to bytes, longs, etc...
392 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
393 oper->gtType = TYP_I_IMPL;
394 GenTreePtr asg = gtNewTempAssign(lclNum, oper);
395 oper->gtType = srcType;
398 GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
400 // Generate the comma tree
401 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
403 return fgMorphTree(oper);
406 // Look for narrowing casts ([u]long -> [u]int) and try to push them
407 // down into the operand before morphing it.
409 // It doesn't matter if this is cast is from ulong or long (i.e. if
410 // GTF_UNSIGNED is set) because the transformation is only applied to
411 // overflow-insensitive narrowing casts, which always silently truncate.
413 // Note that casts from [u]long to small integer types are handled above.
414 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
416 // As a special case, look for overflow-sensitive casts of an AND
417 // expression, and see if the second operand is a small constant. Since
418 // the result of an AND is bound by its smaller operand, it may be
419 // possible to prove that the cast won't overflow, which will in turn
420 // allow the cast's operand to be transformed.
421 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
423 GenTreePtr andOp2 = oper->gtOp.gtOp2;
425 // Special case to the special case: AND with a casted int.
426 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
428 // gtFoldExprConst will deal with whether the cast is signed or
429 // unsigned, or overflow-sensitive.
430 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
433 // Look for a constant less than 2^{32} for a cast to uint, or less
434 // than 2^{31} for a cast to int.
435 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
437 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
439 // This cast can't overflow.
440 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
444 // Only apply this transformation during global morph,
445 // when neither the cast node nor the oper node may throw an exception
446 // based on the upper 32 bits.
448 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
450 // For these operations the lower 32 bits of the result only depends
451 // upon the lower 32 bits of the operands
453 if ((oper->OperGet() == GT_ADD) || (oper->OperGet() == GT_MUL) || (oper->OperGet() == GT_AND) ||
454 (oper->OperGet() == GT_OR) || (oper->OperGet() == GT_XOR))
456 DEBUG_DESTROY_NODE(tree);
458 // Insert narrowing casts for op1 and op2
459 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
460 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
462 // Clear the GT_MUL_64RSLT if it is set
463 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
465 oper->gtFlags &= ~GTF_MUL_64RSLT;
468 // The operation now produces a 32-bit result.
469 oper->gtType = TYP_INT;
471 // Remorph the new tree as the casts that we added may be folded away.
472 return fgMorphTree(oper);
478 noway_assert(tree->gtOper == GT_CAST);
480 /* Morph the operand */
481 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
483 /* Reset the call flag */
484 tree->gtFlags &= ~GTF_CALL;
486 /* unless we have an overflow cast, reset the except flag */
487 if (!tree->gtOverflow())
489 tree->gtFlags &= ~GTF_EXCEPT;
492 /* Just in case new side effects were introduced */
493 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
495 srcType = oper->TypeGet();
497 /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
498 if (tree->gtFlags & GTF_UNSIGNED)
500 srcType = genUnsignedType(srcType);
503 srcSize = genTypeSize(srcType);
505 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
507 /* See if we can discard the cast */
508 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
510 if (srcType == dstType)
511 { // Certainly if they are identical it is pointless
515 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
517 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
518 LclVarDsc* varDsc = &lvaTable[varNum];
519 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
525 bool unsignedSrc = varTypeIsUnsigned(srcType);
526 bool unsignedDst = varTypeIsUnsigned(dstType);
527 bool signsDiffer = (unsignedSrc != unsignedDst);
529 // For same sized casts with
530 // the same signs or non-overflow cast we discard them as well
531 if (srcSize == dstSize)
533 /* This should have been handled above */
534 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
541 if (!tree->gtOverflow())
543 /* For small type casts, when necessary we force
544 the src operand to the dstType and allow the
545 implied load from memory to perform the casting */
546 if (varTypeIsSmall(srcType))
548 switch (oper->gtOper)
554 oper->gtType = dstType;
567 if (srcSize < dstSize) // widening cast
569 // Keep any long casts
570 if (dstSize == sizeof(int))
572 // Only keep signed to unsigned widening cast with overflow check
573 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
579 // Casts from signed->unsigned can never overflow while widening
581 if (unsignedSrc || !unsignedDst)
583 tree->gtFlags &= ~GTF_OVERFLOW;
588 // Try to narrow the operand of the cast and discard the cast
589 // Note: Do not narrow a cast that is marked as a CSE
590 // And do not narrow if the oper is marked as a CSE either
592 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
593 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
595 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
597 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
598 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
600 oper = oper->gtCast.CastOp();
607 switch (oper->gtOper)
609 /* If the operand is a constant, we'll fold it */
615 GenTreePtr oldTree = tree;
617 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
619 // Did we get a comma throw as a result of gtFoldExprConst?
620 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
622 noway_assert(fgIsCommaThrow(tree));
623 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
624 fgMorphTreeDone(tree);
627 else if (tree->gtOper != GT_CAST)
632 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
637 /* Check for two consecutive casts into the same dstType */
638 if (!tree->gtOverflow())
640 var_types dstType2 = oper->CastToType();
641 if (dstType == dstType2)
648 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
649 so that the code generator will know not to convert the result
650 of the idiv to a regpair */
652 if (dstType == TYP_INT)
654 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
659 if (dstType == TYP_UINT)
661 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
666 // Check for cast of a GT_COMMA with a throw overflow
667 // Bug 110829: Since this optimization will bash the types
668 // neither oper or commaOp2 can be CSE candidates
669 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
671 GenTreePtr commaOp2 = oper->gtOp.gtOp2;
673 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
675 // need type of oper to be same as tree
676 if (tree->gtType == TYP_LONG)
678 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
679 commaOp2->gtIntConCommon.SetLngValue(0);
680 /* Change the types of oper and commaOp2 to TYP_LONG */
681 oper->gtType = commaOp2->gtType = TYP_LONG;
683 else if (varTypeIsFloating(tree->gtType))
685 commaOp2->ChangeOperConst(GT_CNS_DBL);
686 commaOp2->gtDblCon.gtDconVal = 0.0;
687 // Change the types of oper and commaOp2
688 // X87 promotes everything to TYP_DOUBLE
689 // But other's are a little more precise
690 const var_types newTyp
691 #if FEATURE_X87_DOUBLES
693 #else // FEATURE_X87_DOUBLES
695 #endif // FEATURE_X87_DOUBLES
696 oper->gtType = commaOp2->gtType = newTyp;
700 commaOp2->ChangeOperConst(GT_CNS_INT);
701 commaOp2->gtIntCon.gtIconVal = 0;
702 /* Change the types of oper and commaOp2 to TYP_INT */
703 oper->gtType = commaOp2->gtType = TYP_INT;
707 if (vnStore != nullptr)
709 fgValueNumberTreeConst(commaOp2);
712 /* Return the GT_COMMA node as the new tree */
719 } /* end switch (oper->gtOper) */
722 if (tree->gtOverflow())
724 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
731 /* Here we've eliminated the cast, so just return it's operand */
732 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
734 DEBUG_DESTROY_NODE(tree);
741 /*****************************************************************************
743 * Perform an unwrap operation on a Proxy object
746 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
748 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
750 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
753 // Perform the unwrap:
755 // This requires two extra indirections.
756 // We mark these indirections as 'invariant' and
757 // the CSE logic will hoist them when appropriate.
759 // Note that each dereference is a GC pointer
761 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
763 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
764 objRef->gtFlags |= GTF_IND_INVARIANT;
766 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
768 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
769 objRef->gtFlags |= GTF_IND_INVARIANT;
771 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
775 /*****************************************************************************
777 * Morph an argument list; compute the pointer argument count in the process.
779 * NOTE: This function can be called from any place in the JIT to perform re-morphing
780 * due to graph altering modifications such as copy / constant propagation
783 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
785 assert(tree->gtOper == GT_LIST);
788 if (tree->gtOp.gtOp2)
790 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
793 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
795 tree->gtFlags &= ~GTF_ALL_EFFECT;
796 tree->gtFlags |= flags;
798 return tree->gtFlags;
802 void fgArgTabEntry::Dump()
804 printf("fgArgTabEntry[arg %u", argNum);
805 if (regNum != REG_STK)
807 printf(", %s, regs=%u", getRegName(regNum), numRegs);
811 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
813 printf(", align=%u", alignment);
814 if (lateArgInx != (unsigned)-1)
816 printf(", lateArgInx=%u", lateArgInx);
824 printf(", tmpNum=V%02u", tmpNum);
828 printf(", needPlace");
836 printf(", processed");
844 printf(", isBackFilled");
848 printf(", isNonStandard");
854 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
858 argCount = 0; // filled in arg count, starts at zero
859 nextSlotNum = INIT_ARG_STACK_SLOT;
861 #if defined(UNIX_X86_ABI)
862 alignmentDone = false;
866 #if FEATURE_FIXED_OUT_ARGS
870 argTableSize = numArgs; // the allocated table size
873 hasStackArgs = false;
874 argsComplete = false;
877 if (argTableSize == 0)
883 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
887 /*****************************************************************************
889 * fgArgInfo Copy Constructor
891 * This method needs to act like a copy constructor for fgArgInfo.
892 * The newCall needs to have its fgArgInfo initialized such that
893 * we have newCall that is an exact copy of the oldCall.
894 * We have to take care since the argument information
895 * in the argTable contains pointers that must point to the
896 * new arguments and not the old arguments.
898 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
900 fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
902 compiler = oldArgInfo->compiler;
904 argCount = 0; // filled in arg count, starts at zero
905 nextSlotNum = INIT_ARG_STACK_SLOT;
906 stkLevel = oldArgInfo->stkLevel;
907 #if defined(UNIX_X86_ABI)
908 alignmentDone = oldArgInfo->alignmentDone;
909 stkSizeBytes = oldArgInfo->stkSizeBytes;
910 padStkAlign = oldArgInfo->padStkAlign;
912 #if FEATURE_FIXED_OUT_ARGS
913 outArgSize = oldArgInfo->outArgSize;
915 argTableSize = oldArgInfo->argTableSize;
916 argsComplete = false;
918 if (argTableSize > 0)
920 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
921 for (unsigned inx = 0; inx < argTableSize; inx++)
923 argTable[inx] = nullptr;
927 assert(oldArgInfo->argsComplete);
929 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
930 // so we can iterate over these argument lists more uniformly.
931 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
932 GenTreeArgList* newArgs;
933 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
934 GenTreeArgList* oldArgs;
935 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
937 if (newCall->gtCallObjp == nullptr)
939 assert(oldCall->gtCallObjp == nullptr);
940 newArgs = newCall->gtCallArgs;
941 oldArgs = oldCall->gtCallArgs;
945 assert(oldCall->gtCallObjp != nullptr);
946 newArgObjp.Current() = newCall->gtCallArgs;
947 newArgs = &newArgObjp;
948 oldArgObjp.Current() = oldCall->gtCallObjp;
949 oldArgs = &oldArgObjp;
954 GenTreeArgList* newParent = nullptr;
955 GenTreeArgList* oldParent = nullptr;
956 fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
957 bool scanRegArgs = false;
961 /* Get hold of the next argument values for the oldCall and newCall */
963 newCurr = newArgs->Current();
964 oldCurr = oldArgs->Current();
965 if (newArgs != &newArgObjp)
972 assert(newParent == nullptr && oldParent == nullptr);
974 newArgs = newArgs->Rest();
975 oldArgs = oldArgs->Rest();
977 fgArgTabEntryPtr oldArgTabEntry = nullptr;
978 fgArgTabEntryPtr newArgTabEntry = nullptr;
980 for (unsigned inx = 0; inx < argTableSize; inx++)
982 oldArgTabEntry = oldArgTable[inx];
984 if (oldArgTabEntry->parent == oldParent)
986 assert((oldParent == nullptr) == (newParent == nullptr));
988 // We have found the matching "parent" field in oldArgTabEntry
990 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
992 // First block copy all fields
994 *newArgTabEntry = *oldArgTabEntry;
996 // Then update all GenTreePtr fields in the newArgTabEntry
998 newArgTabEntry->parent = newParent;
1000 // The node field is likely to have been updated
1001 // to point at a node in the gtCallLateArgs list
1003 if (oldArgTabEntry->node == oldCurr)
1005 // node is not pointing into the gtCallLateArgs list
1006 newArgTabEntry->node = newCurr;
1010 // node must be pointing into the gtCallLateArgs list
1012 // We will fix this pointer up in the next loop
1014 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1019 // Now initialize the proper element in the argTable array
1021 argTable[inx] = newArgTabEntry;
1025 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1027 assert(newArgTabEntry != nullptr);
1032 newArgs = newCall->gtCallLateArgs;
1033 oldArgs = oldCall->gtCallLateArgs;
1037 /* Get hold of the next argument values for the oldCall and newCall */
1039 assert(newArgs->OperIsList());
1041 newCurr = newArgs->Current();
1042 newArgs = newArgs->Rest();
1044 assert(oldArgs->OperIsList());
1046 oldCurr = oldArgs->Current();
1047 oldArgs = oldArgs->Rest();
1049 fgArgTabEntryPtr oldArgTabEntry = nullptr;
1050 fgArgTabEntryPtr newArgTabEntry = nullptr;
1052 for (unsigned inx = 0; inx < argTableSize; inx++)
1054 oldArgTabEntry = oldArgTable[inx];
1056 if (oldArgTabEntry->node == oldCurr)
1058 // We have found the matching "node" field in oldArgTabEntry
1060 newArgTabEntry = argTable[inx];
1061 assert(newArgTabEntry != nullptr);
1063 // update the "node" GenTreePtr fields in the newArgTabEntry
1065 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1067 newArgTabEntry->node = newCurr;
1074 argCount = oldArgInfo->argCount;
1075 nextSlotNum = oldArgInfo->nextSlotNum;
1076 hasRegArgs = oldArgInfo->hasRegArgs;
1077 hasStackArgs = oldArgInfo->hasStackArgs;
1078 argsComplete = true;
1082 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1084 assert(argCount < argTableSize);
1085 argTable[argCount] = curArgTabEntry;
1089 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1090 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1092 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1094 curArgTabEntry->argNum = argNum;
1095 curArgTabEntry->node = node;
1096 curArgTabEntry->parent = parent;
1097 curArgTabEntry->regNum = regNum;
1098 curArgTabEntry->slotNum = 0;
1099 curArgTabEntry->numRegs = numRegs;
1100 curArgTabEntry->numSlots = 0;
1101 curArgTabEntry->alignment = alignment;
1102 curArgTabEntry->lateArgInx = (unsigned)-1;
1103 curArgTabEntry->tmpNum = (unsigned)-1;
1104 curArgTabEntry->isSplit = false;
1105 curArgTabEntry->isTmp = false;
1106 curArgTabEntry->needTmp = false;
1107 curArgTabEntry->needPlace = false;
1108 curArgTabEntry->processed = false;
1109 curArgTabEntry->isHfaRegArg = false;
1110 curArgTabEntry->isBackFilled = false;
1111 curArgTabEntry->isNonStandard = false;
1114 AddArg(curArgTabEntry);
1115 return curArgTabEntry;
1118 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1119 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1125 const bool isStruct,
1126 const regNumber otherRegNum,
1127 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1129 fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1130 assert(curArgTabEntry != nullptr);
1132 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1133 // PlaceHolder node (in case of needed late argument, for example.)
1134 // This requires using of an extra flag. At creation time the state is right, so
1135 // and this assert enforces that.
1136 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1137 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1138 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1140 if (isStruct && structDescPtr != nullptr)
1142 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1145 return curArgTabEntry;
1147 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1149 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
1154 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1156 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1158 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1160 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1161 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1162 // PlaceHolder node (in case of needed late argument, for example.)
1163 // This reqires using of an extra flag. At creation time the state is right, so
1164 // and this assert enforces that.
1165 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1166 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1167 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1169 curArgTabEntry->argNum = argNum;
1170 curArgTabEntry->node = node;
1171 curArgTabEntry->parent = parent;
1172 curArgTabEntry->regNum = REG_STK;
1173 curArgTabEntry->slotNum = nextSlotNum;
1174 curArgTabEntry->numRegs = 0;
1175 curArgTabEntry->numSlots = numSlots;
1176 curArgTabEntry->alignment = alignment;
1177 curArgTabEntry->lateArgInx = (unsigned)-1;
1178 curArgTabEntry->tmpNum = (unsigned)-1;
1179 curArgTabEntry->isSplit = false;
1180 curArgTabEntry->isTmp = false;
1181 curArgTabEntry->needTmp = false;
1182 curArgTabEntry->needPlace = false;
1183 curArgTabEntry->processed = false;
1184 curArgTabEntry->isHfaRegArg = false;
1185 curArgTabEntry->isBackFilled = false;
1186 curArgTabEntry->isNonStandard = false;
1188 hasStackArgs = true;
1189 AddArg(curArgTabEntry);
1191 nextSlotNum += numSlots;
1192 return curArgTabEntry;
1195 void fgArgInfo::RemorphReset()
1197 nextSlotNum = INIT_ARG_STACK_SLOT;
1200 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1201 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1203 fgArgTabEntryPtr curArgTabEntry = nullptr;
1204 unsigned regArgInx = 0;
1207 for (inx = 0; inx < argCount; inx++)
1209 curArgTabEntry = argTable[inx];
1210 if (curArgTabEntry->argNum == argNum)
1217 if (curArgTabEntry->parent != nullptr)
1219 assert(curArgTabEntry->parent->OperIsList());
1220 argx = curArgTabEntry->parent->Current();
1221 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1225 argx = curArgTabEntry->node;
1234 // if this was a nonstandard arg the table is definitive
1235 if (curArgTabEntry->isNonStandard)
1237 regNum = curArgTabEntry->regNum;
1240 assert(curArgTabEntry->argNum == argNum);
1241 assert(curArgTabEntry->regNum == regNum);
1242 assert(curArgTabEntry->alignment == alignment);
1243 assert(curArgTabEntry->parent == parent);
1245 if (curArgTabEntry->node != node)
1247 GenTreePtr argx = nullptr;
1248 unsigned regIndex = 0;
1250 /* process the register argument list */
1251 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1253 argx = list->Current();
1254 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1255 if (regIndex == regArgInx)
1260 assert(regIndex == regArgInx);
1261 assert(regArgInx == curArgTabEntry->lateArgInx);
1263 if (curArgTabEntry->node != argx)
1265 curArgTabEntry->node = argx;
1268 return curArgTabEntry;
1271 void fgArgInfo::RemorphStkArg(
1272 unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1274 fgArgTabEntryPtr curArgTabEntry = nullptr;
1275 bool isRegArg = false;
1276 unsigned regArgInx = 0;
1280 for (inx = 0; inx < argCount; inx++)
1282 curArgTabEntry = argTable[inx];
1284 if (curArgTabEntry->parent != nullptr)
1286 assert(curArgTabEntry->parent->OperIsList());
1287 argx = curArgTabEntry->parent->Current();
1288 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1292 argx = curArgTabEntry->node;
1296 if (curArgTabEntry->argNum == argNum)
1307 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1309 assert(curArgTabEntry->argNum == argNum);
1310 assert(curArgTabEntry->slotNum == nextSlotNum);
1311 assert(curArgTabEntry->numSlots == numSlots);
1312 assert(curArgTabEntry->alignment == alignment);
1313 assert(curArgTabEntry->parent == parent);
1314 assert(parent->OperIsList());
1316 #if FEATURE_FIXED_OUT_ARGS
1317 if (curArgTabEntry->node != node)
1321 GenTreePtr argx = nullptr;
1322 unsigned regIndex = 0;
1324 /* process the register argument list */
1325 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1327 argx = list->Current();
1328 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1329 if (regIndex == regArgInx)
1334 assert(regIndex == regArgInx);
1335 assert(regArgInx == curArgTabEntry->lateArgInx);
1337 if (curArgTabEntry->node != argx)
1339 curArgTabEntry->node = argx;
1344 assert(parent->Current() == node);
1345 curArgTabEntry->node = node;
1349 curArgTabEntry->node = node;
1352 nextSlotNum += numSlots;
1355 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1357 fgArgTabEntryPtr curArgTabEntry = nullptr;
1358 assert(argNum < argCount);
1359 for (unsigned inx = 0; inx < argCount; inx++)
1361 curArgTabEntry = argTable[inx];
1362 if (curArgTabEntry->argNum == argNum)
1368 assert(numRegs > 0);
1369 assert(numSlots > 0);
1371 curArgTabEntry->isSplit = true;
1372 curArgTabEntry->numRegs = numRegs;
1373 curArgTabEntry->numSlots = numSlots;
1375 nextSlotNum += numSlots;
1378 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1380 fgArgTabEntryPtr curArgTabEntry = nullptr;
1381 assert(argNum < argCount);
1382 for (unsigned inx = 0; inx < argCount; inx++)
1384 curArgTabEntry = argTable[inx];
1385 if (curArgTabEntry->argNum == argNum)
1390 assert(curArgTabEntry->parent->Current() == newNode);
1392 curArgTabEntry->node = newNode;
1393 curArgTabEntry->tmpNum = tmpNum;
1394 curArgTabEntry->isTmp = true;
1397 void fgArgInfo::ArgsComplete()
1399 bool hasStackArgs = false;
1400 bool hasStructRegArg = false;
1402 for (unsigned curInx = 0; curInx < argCount; curInx++)
1404 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1405 assert(curArgTabEntry != nullptr);
1406 GenTreePtr argx = curArgTabEntry->node;
1408 if (curArgTabEntry->regNum == REG_STK)
1410 hasStackArgs = true;
1411 #if !FEATURE_FIXED_OUT_ARGS
1412 // On x86 we use push instructions to pass arguments:
1413 // The non-register arguments are evaluated and pushed in order
1414 // and they are never evaluated into temps
1419 else // we have a register argument, next we look for a struct type.
1421 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1423 hasStructRegArg = true;
1427 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1428 and every earlier argument (except constants) must be evaluated into temps
1429 since there may be other arguments that follow and they may use the value being assigned.
1431 EXAMPLE: ArgTab is "a, a=5, a"
1432 -> when we see the second arg "a=5"
1433 we know the first two arguments "a, a=5" have to be evaluated into temps
1435 For the case of an assignment, we only know that there exist some assignment someplace
1436 in the tree. We don't know what is being assigned so we are very conservative here
1437 and assume that any local variable could have been assigned.
1440 if (argx->gtFlags & GTF_ASG)
1442 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1443 // a tmp, then we need a temp in the late arg list.
1444 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1445 #ifdef FEATURE_FIXED_OUT_ARGS
1446 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1447 // that we only have late non-register args when that feature is on.
1448 #endif // FEATURE_FIXED_OUT_ARGS
1451 curArgTabEntry->needTmp = true;
1454 // For all previous arguments, unless they are a simple constant
1455 // we require that they be evaluated into temps
1456 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1458 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1459 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1461 assert(prevArgTabEntry->node);
1462 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1464 prevArgTabEntry->needTmp = true;
1469 #if FEATURE_FIXED_OUT_ARGS
1470 // Like calls, if this argument has a tree that will do an inline throw,
1471 // a call to a jit helper, then we need to treat it like a call (but only
1472 // if there are/were any stack args).
1473 // This means unnesting, sorting, etc. Technically this is overly
1474 // conservative, but I want to avoid as much special-case debug-only code
1475 // as possible, so leveraging the GTF_CALL flag is the easiest.
1476 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1477 compiler->opts.compDbgCode &&
1478 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1480 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1482 if (otherInx == curInx)
1487 if (argTable[otherInx]->regNum == REG_STK)
1489 argx->gtFlags |= GTF_CALL;
1494 #endif // FEATURE_FIXED_OUT_ARGS
1496 /* If it contains a call (GTF_CALL) then itself and everything before the call
1497 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1498 has to be kept in the right order since we will move the call to the first position)
1500 For calls we don't have to be quite as conservative as we are with an assignment
1501 since the call won't be modifying any non-address taken LclVars.
1504 if (argx->gtFlags & GTF_CALL)
1506 if (argCount > 1) // If this is not the only argument
1508 curArgTabEntry->needTmp = true;
1510 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1512 // Spill all arguments that are floating point calls
1513 curArgTabEntry->needTmp = true;
1516 // All previous arguments may need to be evaluated into temps
1517 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1519 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1520 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1521 assert(prevArgTabEntry->node);
1523 // For all previous arguments, if they have any GTF_ALL_EFFECT
1524 // we require that they be evaluated into a temp
1525 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1527 prevArgTabEntry->needTmp = true;
1529 #if FEATURE_FIXED_OUT_ARGS
1530 // Or, if they are stored into the FIXED_OUT_ARG area
1531 // we require that they be moved to the gtCallLateArgs
1532 // and replaced with a placeholder node
1533 else if (prevArgTabEntry->regNum == REG_STK)
1535 prevArgTabEntry->needPlace = true;
1541 #ifndef LEGACY_BACKEND
1542 #if FEATURE_MULTIREG_ARGS
1543 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1544 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1546 // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
1547 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1550 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1552 if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1554 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1556 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1557 curArgTabEntry->needTmp = true;
1561 // We call gtPrepareCost to measure the cost of evaluating this tree
1562 compiler->gtPrepareCost(argx);
1564 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1566 // Spill multireg struct arguments that are expensive to evaluate twice
1567 curArgTabEntry->needTmp = true;
1569 else if (argx->OperGet() == GT_OBJ)
1571 GenTreeObj* argObj = argx->AsObj();
1572 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1573 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1580 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1582 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1584 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1585 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1587 curArgTabEntry->needTmp = true;
1595 // Spill any GT_OBJ multireg structs that are difficult to extract
1597 // When we have a GT_OBJ of a struct with the above sizes we would need
1598 // to use 3 or 4 load instructions to load the exact size of this struct.
1599 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1600 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1601 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1604 curArgTabEntry->needTmp = true;
1613 #endif // FEATURE_MULTIREG_ARGS
1614 #endif // LEGACY_BACKEND
1617 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1618 // if we don't have qmarks, then it doesn't matter.
1619 // So check for Qmark's globally once here, instead of inside the loop.
1621 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1623 #if FEATURE_FIXED_OUT_ARGS
1625 // For Arm/x64 we only care because we can't reorder a register
1626 // argument that uses GT_LCLHEAP. This is an optimization to
1627 // save a check inside the below loop.
1629 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1633 const bool hasStackArgsWeCareAbout = hasStackArgs;
1635 #endif // FEATURE_FIXED_OUT_ARGS
1637 // If we have any stack args we have to force the evaluation
1638 // of any arguments passed in registers that might throw an exception
1640 // Technically we only a required to handle the following two cases:
1641 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1642 // a GT_LCLHEAP node that allocates stuff on the stack
1644 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1646 for (unsigned curInx = 0; curInx < argCount; curInx++)
1648 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1649 assert(curArgTabEntry != nullptr);
1650 GenTreePtr argx = curArgTabEntry->node;
1652 // Examine the register args that are currently not marked needTmp
1654 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1656 if (hasStackArgsWeCareAbout)
1658 #if !FEATURE_FIXED_OUT_ARGS
1659 // On x86 we previously recorded a stack depth of zero when
1660 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1661 // Thus we can not reorder the argument after any stack based argument
1662 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1663 // check for it explicitly
1665 if (argx->gtFlags & GTF_EXCEPT)
1667 curArgTabEntry->needTmp = true;
1671 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1673 if (argx->gtFlags & GTF_EXCEPT)
1675 assert(compiler->compLocallocUsed);
1677 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1679 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1681 curArgTabEntry->needTmp = true;
1687 if (hasStructRegArgWeCareAbout)
1689 // Returns true if a GT_QMARK node is encountered in the argx tree
1691 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1693 curArgTabEntry->needTmp = true;
1701 argsComplete = true;
1704 void fgArgInfo::SortArgs()
1706 assert(argsComplete == true);
1709 if (compiler->verbose)
1711 printf("\nSorting the arguments:\n");
1715 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1716 The idea is to move all "simple" arguments like constants and local vars
1717 to the end of the table, and move the complex arguments towards the beginning
1718 of the table. This will help prevent registers from being spilled by
1719 allowing us to evaluate the more complex arguments before the simpler arguments.
1720 The argTable ends up looking like:
1721 +------------------------------------+ <--- argTable[argCount - 1]
1723 +------------------------------------+
1724 | local var / local field |
1725 +------------------------------------+
1726 | remaining arguments sorted by cost |
1727 +------------------------------------+
1728 | temps (argTable[].needTmp = true) |
1729 +------------------------------------+
1730 | args with calls (GTF_CALL) |
1731 +------------------------------------+ <--- argTable[0]
1734 /* Set the beginning and end for the new argument table */
1737 unsigned begTab = 0;
1738 unsigned endTab = argCount - 1;
1739 unsigned argsRemaining = argCount;
1741 // First take care of arguments that are constants.
1742 // [We use a backward iterator pattern]
1749 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1751 if (curArgTabEntry->regNum != REG_STK)
1756 // Skip any already processed args
1758 if (!curArgTabEntry->processed)
1760 GenTreePtr argx = curArgTabEntry->node;
1762 // put constants at the end of the table
1764 if (argx->gtOper == GT_CNS_INT)
1766 noway_assert(curInx <= endTab);
1768 curArgTabEntry->processed = true;
1770 // place curArgTabEntry at the endTab position by performing a swap
1772 if (curInx != endTab)
1774 argTable[curInx] = argTable[endTab];
1775 argTable[endTab] = curArgTabEntry;
1782 } while (curInx > 0);
1784 if (argsRemaining > 0)
1786 // Next take care of arguments that are calls.
1787 // [We use a forward iterator pattern]
1789 for (curInx = begTab; curInx <= endTab; curInx++)
1791 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1793 // Skip any already processed args
1795 if (!curArgTabEntry->processed)
1797 GenTreePtr argx = curArgTabEntry->node;
1799 // put calls at the beginning of the table
1801 if (argx->gtFlags & GTF_CALL)
1803 curArgTabEntry->processed = true;
1805 // place curArgTabEntry at the begTab position by performing a swap
1807 if (curInx != begTab)
1809 argTable[curInx] = argTable[begTab];
1810 argTable[begTab] = curArgTabEntry;
1820 if (argsRemaining > 0)
1822 // Next take care arguments that are temps.
1823 // These temps come before the arguments that are
1824 // ordinary local vars or local fields
1825 // since this will give them a better chance to become
1826 // enregistered into their actual argument register.
1827 // [We use a forward iterator pattern]
1829 for (curInx = begTab; curInx <= endTab; curInx++)
1831 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1833 // Skip any already processed args
1835 if (!curArgTabEntry->processed)
1837 if (curArgTabEntry->needTmp)
1839 curArgTabEntry->processed = true;
1841 // place curArgTabEntry at the begTab position by performing a swap
1843 if (curInx != begTab)
1845 argTable[curInx] = argTable[begTab];
1846 argTable[begTab] = curArgTabEntry;
1856 if (argsRemaining > 0)
1858 // Next take care of local var and local field arguments.
1859 // These are moved towards the end of the argument evaluation.
1860 // [We use a backward iterator pattern]
1862 curInx = endTab + 1;
1867 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1869 // Skip any already processed args
1871 if (!curArgTabEntry->processed)
1873 GenTreePtr argx = curArgTabEntry->node;
1875 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1877 noway_assert(curInx <= endTab);
1879 curArgTabEntry->processed = true;
1881 // place curArgTabEntry at the endTab position by performing a swap
1883 if (curInx != endTab)
1885 argTable[curInx] = argTable[endTab];
1886 argTable[endTab] = curArgTabEntry;
1893 } while (curInx > begTab);
1896 // Finally, take care of all the remaining arguments.
1897 // Note that we fill in one arg at a time using a while loop.
1898 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1899 while (argsRemaining > 0)
1901 /* Find the most expensive arg remaining and evaluate it next */
1903 fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1904 unsigned expensiveArg = UINT_MAX;
1905 unsigned expensiveArgCost = 0;
1907 // [We use a forward iterator pattern]
1909 for (curInx = begTab; curInx <= endTab; curInx++)
1911 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1913 // Skip any already processed args
1915 if (!curArgTabEntry->processed)
1917 GenTreePtr argx = curArgTabEntry->node;
1919 // We should have already handled these kinds of args
1920 assert(argx->gtOper != GT_LCL_VAR);
1921 assert(argx->gtOper != GT_LCL_FLD);
1922 assert(argx->gtOper != GT_CNS_INT);
1924 // This arg should either have no persistent side effects or be the last one in our table
1925 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1927 if (argsRemaining == 1)
1929 // This is the last arg to place
1930 expensiveArg = curInx;
1931 expensiveArgTabEntry = curArgTabEntry;
1932 assert(begTab == endTab);
1939 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1940 compiler->gtPrepareCost(argx);
1943 if (argx->gtCostEx > expensiveArgCost)
1945 // Remember this arg as the most expensive one that we have yet seen
1946 expensiveArgCost = argx->gtCostEx;
1947 expensiveArg = curInx;
1948 expensiveArgTabEntry = curArgTabEntry;
1954 noway_assert(expensiveArg != UINT_MAX);
1956 // put the most expensive arg towards the beginning of the table
1958 expensiveArgTabEntry->processed = true;
1960 // place expensiveArgTabEntry at the begTab position by performing a swap
1962 if (expensiveArg != begTab)
1964 argTable[expensiveArg] = argTable[begTab];
1965 argTable[begTab] = expensiveArgTabEntry;
1971 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
1974 // The table should now be completely filled and thus begTab should now be adjacent to endTab
1975 // and regArgsRemaining should be zero
1976 assert(begTab == (endTab + 1));
1977 assert(argsRemaining == 0);
1979 #if !FEATURE_FIXED_OUT_ARGS
1980 // Finally build the regArgList
1982 callTree->gtCall.regArgList = NULL;
1983 callTree->gtCall.regArgListCount = regCount;
1985 unsigned regInx = 0;
1986 for (curInx = 0; curInx < argCount; curInx++)
1988 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1990 if (curArgTabEntry->regNum != REG_STK)
1992 // Encode the argument register in the register mask
1994 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
1998 #endif // !FEATURE_FIXED_OUT_ARGS
2003 //------------------------------------------------------------------------------
2004 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2005 // We need this to be done in order to enforce ordering
2006 // of the evaluation of arguments.
2009 // tmpVarNum - the var num which we clone into the newly created temp var.
2012 // the newly created temp var tree.
2014 GenTreePtr Compiler::fgMakeTmpArgNode(
2015 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2017 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2018 assert(varDsc->lvIsTemp);
2019 var_types type = varDsc->TypeGet();
2021 // Create a copy of the temp to go into the late argument list
2022 GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
2023 GenTreePtr addrNode = nullptr;
2025 if (varTypeIsStruct(type))
2028 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2030 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2032 arg->gtFlags |= GTF_DONT_CSE;
2034 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2035 // Can this type be passed in a single register?
2036 // If so, the following call will return the corresponding primitive type.
2037 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2039 bool passedInRegisters = false;
2040 structPassingKind kind;
2041 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2042 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2044 if (structBaseType != TYP_UNKNOWN)
2046 passedInRegisters = true;
2047 type = structBaseType;
2049 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2051 // If it is passed in registers, don't get the address of the var. Make it a
2052 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2053 if (passedInRegisters)
2055 arg->ChangeOper(GT_LCL_FLD);
2060 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2061 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2062 var_types addrType = type;
2064 var_types addrType = TYP_BYREF;
2066 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2069 #if FEATURE_MULTIREG_ARGS
2070 #ifdef _TARGET_ARM64_
2071 assert(varTypeIsStruct(type));
2072 if (lvaIsMultiregStruct(varDsc))
2074 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2075 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2076 // We will create a GT_OBJ for the argument below.
2077 // This will be passed by value in two registers.
2078 assert(addrNode != nullptr);
2080 // Create an Obj of the temp to use it as a call argument.
2081 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2083 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2084 // this is only to preserve former behavior (though some CSE'ing of struct
2085 // values can be pessimizing, so enabling this may require some additional tuning).
2086 arg->gtFlags |= GTF_DONT_CSE;
2088 #endif // _TARGET_ARM64_
2089 #endif // FEATURE_MULTIREG_ARGS
2092 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2094 // other targets, we pass the struct by value
2095 assert(varTypeIsStruct(type));
2097 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2099 // Get a new Obj node temp to use it as a call argument.
2100 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2101 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2103 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2105 } // (varTypeIsStruct(type))
2107 if (addrNode != nullptr)
2109 assert(addrNode->gtOper == GT_ADDR);
2111 // This will prevent this LclVar from being optimized away
2112 lvaSetVarAddrExposed(tmpVarNum);
2114 // the child of a GT_ADDR is required to have this flag set
2115 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2121 void fgArgInfo::EvalArgsToTemps()
2123 assert(argsSorted == true);
2125 unsigned regArgInx = 0;
2126 // Now go through the argument table and perform the necessary evaluation into temps
2127 GenTreeArgList* tmpRegArgNext = nullptr;
2128 for (unsigned curInx = 0; curInx < argCount; curInx++)
2130 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2132 GenTreePtr argx = curArgTabEntry->node;
2133 GenTreePtr setupArg = nullptr;
2136 #if !FEATURE_FIXED_OUT_ARGS
2137 // Only ever set for FEATURE_FIXED_OUT_ARGS
2138 assert(curArgTabEntry->needPlace == false);
2140 // On x86 and other archs that use push instructions to pass arguments:
2141 // Only the register arguments need to be replaced with placeholder nodes.
2142 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2144 if (curArgTabEntry->regNum == REG_STK)
2148 if (curArgTabEntry->needTmp)
2152 if (curArgTabEntry->isTmp == true)
2154 // Create a copy of the temp to go into the late argument list
2155 tmpVarNum = curArgTabEntry->tmpNum;
2156 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2157 argTable[curInx]->structDesc.passedInRegisters));
2159 // mark the original node as a late argument
2160 argx->gtFlags |= GTF_LATE_ARG;
2164 // Create a temp assignment for the argument
2165 // Put the temp in the gtCallLateArgs list
2166 CLANG_FORMAT_COMMENT_ANCHOR;
2169 if (compiler->verbose)
2171 printf("Argument with 'side effect'...\n");
2172 compiler->gtDispTree(argx);
2176 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2177 noway_assert(argx->gtType != TYP_STRUCT);
2180 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2181 if (argx->gtOper == GT_MKREFANY)
2183 // For GT_MKREFANY, typically the actual struct copying does
2184 // not have any side-effects and can be delayed. So instead
2185 // of using a temp for the whole struct, we can just use a temp
2186 // for operand that that has a side-effect
2188 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2190 operand = argx->gtOp.gtOp1;
2192 // In the early argument evaluation, place an assignment to the temp
2193 // from the source operand of the mkrefany
2194 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2196 // Replace the operand for the mkrefany with the new temp.
2197 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2199 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2201 operand = argx->gtOp.gtOp2;
2203 // In the early argument evaluation, place an assignment to the temp
2204 // from the source operand of the mkrefany
2205 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2207 // Replace the operand for the mkrefany with the new temp.
2208 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2212 if (setupArg != nullptr)
2214 // Now keep the mkrefany for the late argument list
2217 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2218 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2222 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2224 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2226 #ifndef LEGACY_BACKEND
2227 if (compiler->fgOrder == Compiler::FGOrderLinear)
2229 // We'll reference this temporary variable just once
2230 // when we perform the function call after
2231 // setting up this argument.
2232 varDsc->lvRefCnt = 1;
2234 #endif // !LEGACY_BACKEND
2236 var_types lclVarType = genActualType(argx->gtType);
2237 var_types scalarType = TYP_UNKNOWN;
2239 if (setupArg->OperIsCopyBlkOp())
2241 setupArg = compiler->fgMorphCopyBlock(setupArg);
2242 #ifdef _TARGET_ARM64_
2243 // This scalar LclVar widening step is only performed for ARM64
2245 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2246 unsigned structSize = varDsc->lvExactSize;
2248 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2249 #endif // _TARGET_ARM64_
2252 // scalarType can be set to a wider type for ARM64: (3 => 4) or (5,6,7 => 8)
2253 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2255 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2256 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2260 // Create a copy of the temp to go to the late argument list
2261 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2264 curArgTabEntry->isTmp = true;
2265 curArgTabEntry->tmpNum = tmpVarNum;
2268 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2269 // might have left holes in the used registers (see
2270 // fgAddSkippedRegsInPromotedStructArg).
2271 // Too bad we're not that smart for these intermediate temps...
2272 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2274 regNumber argReg = curArgTabEntry->regNum;
2275 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2276 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2278 argReg = genRegArgNext(argReg);
2279 allUsedRegs |= genRegMask(argReg);
2281 #ifdef LEGACY_BACKEND
2282 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2283 #endif // LEGACY_BACKEND
2285 #endif // _TARGET_ARM_
2288 /* mark the assignment as a late argument */
2289 setupArg->gtFlags |= GTF_LATE_ARG;
2292 if (compiler->verbose)
2294 printf("\n Evaluate to a temp:\n");
2295 compiler->gtDispTree(setupArg);
2300 else // curArgTabEntry->needTmp == false
2303 // Only register args are replaced with placeholder nodes
2304 // and the stack based arguments are evaluated and pushed in order.
2306 // On Arm/x64 - When needTmp is false and needPlace is false,
2307 // the non-register arguments are evaluated and stored in order.
2308 // When needPlace is true we have a nested call that comes after
2309 // this argument so we have to replace it in the gtCallArgs list
2310 // (the initial argument evaluation list) with a placeholder.
2312 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2317 /* No temp needed - move the whole node to the gtCallLateArgs list */
2319 /* The argument is deferred and put in the late argument list */
2323 // Create a placeholder node to put in its place in gtCallLateArgs.
2325 // For a struct type we also need to record the class handle of the arg.
2326 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2328 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2330 // All structs are either passed (and retyped) as integral types, OR they
2331 // are passed by reference.
2332 noway_assert(argx->gtType != TYP_STRUCT);
2334 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2336 if (varTypeIsStruct(defArg))
2338 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2339 GenTreePtr defArgTmp = defArg;
2341 // The GT_OBJ may be be a child of a GT_COMMA.
2342 while (defArgTmp->gtOper == GT_COMMA)
2344 defArgTmp = defArgTmp->gtOp.gtOp2;
2346 assert(varTypeIsStruct(defArgTmp));
2348 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2349 if (defArgTmp->gtOper == GT_MKREFANY)
2351 clsHnd = compiler->impGetRefAnyClass();
2353 else if (defArgTmp->gtOper == GT_OBJ)
2355 clsHnd = defArgTmp->AsObj()->gtClass;
2359 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2363 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2365 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2367 /* mark the placeholder node as a late argument */
2368 setupArg->gtFlags |= GTF_LATE_ARG;
2371 if (compiler->verbose)
2373 if (curArgTabEntry->regNum == REG_STK)
2375 printf("Deferred stack argument :\n");
2379 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2382 compiler->gtDispTree(argx);
2383 printf("Replaced with placeholder node:\n");
2384 compiler->gtDispTree(setupArg);
2389 if (setupArg != nullptr)
2391 if (curArgTabEntry->parent)
2393 GenTreePtr parent = curArgTabEntry->parent;
2394 /* a normal argument from the list */
2395 noway_assert(parent->OperIsList());
2396 noway_assert(parent->gtOp.gtOp1 == argx);
2398 parent->gtOp.gtOp1 = setupArg;
2402 /* must be the gtCallObjp */
2403 noway_assert(callTree->gtCall.gtCallObjp == argx);
2405 callTree->gtCall.gtCallObjp = setupArg;
2409 /* deferred arg goes into the late argument list */
2411 if (tmpRegArgNext == nullptr)
2413 tmpRegArgNext = compiler->gtNewArgList(defArg);
2414 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2418 noway_assert(tmpRegArgNext->OperIsList());
2419 noway_assert(tmpRegArgNext->Current());
2420 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2421 tmpRegArgNext = tmpRegArgNext->Rest();
2424 curArgTabEntry->node = defArg;
2425 curArgTabEntry->lateArgInx = regArgInx++;
2429 if (compiler->verbose)
2431 printf("\nShuffled argument table: ");
2432 for (unsigned curInx = 0; curInx < argCount; curInx++)
2434 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2436 if (curArgTabEntry->regNum != REG_STK)
2438 printf("%s ", getRegName(curArgTabEntry->regNum));
2446 // Get the late arg for arg at position argIndex.
2447 // argIndex - 0-based position to get late arg for.
2448 // Caller must ensure this position has a late arg.
2449 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2451 for (unsigned j = 0; j < this->ArgCount(); j++)
2453 if (this->ArgTable()[j]->argNum == argIndex)
2455 return this->ArgTable()[j]->node;
2458 // Caller must ensure late arg exists.
2462 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2464 assert(!IsUninitialized(stkLvl));
2465 this->stkLevel = stkLvl;
2468 unsigned fgArgInfo::RetrieveStkLevel()
2470 assert(!IsUninitialized(stkLevel));
2474 // Return a conservative estimate of the stack size in bytes.
2475 // It will be used only on the intercepted-for-host code path to copy the arguments.
2476 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2480 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2486 if (numArgs > MAX_REG_ARG)
2488 numStkArgs = numArgs - MAX_REG_ARG;
2495 return numStkArgs * REGSIZE_BYTES;
2498 //------------------------------------------------------------------------------
2499 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2500 // otherwise insert a comma form temp
2503 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2504 // evaluates ppTree to a temp and returns the result
2507 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2510 // The result tree MUST be added to the tree structure since the ref counts are
2511 // already incremented.
2513 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2515 GenTree* tree = *pOp;
2516 if (tree->IsLocal())
2518 auto result = gtClone(tree);
2519 if (lvaLocalVarRefCounted)
2521 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2527 GenTree* result = fgInsertCommaFormTemp(pOp);
2529 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2530 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2531 // be added by the caller.
2532 if (lvaLocalVarRefCounted)
2534 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2535 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2536 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2543 //------------------------------------------------------------------------------
2544 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2545 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2548 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2549 // evaluates ppTree to a temp and returns the result
2551 // structType - value type handle if the temp created is of TYP_STRUCT.
2554 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2557 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2559 GenTree* subTree = *ppTree;
2561 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2563 if (varTypeIsStruct(subTree))
2565 assert(structType != nullptr);
2566 lvaSetStruct(lclNum, structType, false);
2569 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2570 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2571 // setting type of lcl vars created.
2572 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2574 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2576 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2580 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2583 //------------------------------------------------------------------------
2584 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2587 // callNode - the call for which we are doing the argument morphing
2590 // Like most morph methods, this method returns the morphed node,
2591 // though in this case there are currently no scenarios where the
2592 // node itself is re-created.
2595 // This method is even less idempotent than most morph methods.
2596 // That is, it makes changes that should not be redone. It uses the existence
2597 // of gtCallLateArgs (the late arguments list) to determine if it has
2598 // already done that work.
2600 // The first time it is called (i.e. during global morphing), this method
2601 // computes the "late arguments". This is when it determines which arguments
2602 // need to be evaluated to temps prior to the main argument setup, and which
2603 // can be directly evaluated into the argument location. It also creates a
2604 // second argument list (gtCallLateArgs) that does the final placement of the
2605 // arguments, e.g. into registers or onto the stack.
2607 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2608 // evaluation of the arguments that might have side-effects, such as embedded
2609 // assignments, calls or possible throws. In these cases, it and earlier
2610 // arguments must be evaluated to temps.
2612 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2613 // if we have any nested calls, we need to defer the copying of the argument
2614 // into the fixed argument area until after the call. If the argument did not
2615 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2616 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2619 #pragma warning(push)
2620 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2622 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2627 unsigned flagsSummary = 0;
2628 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2630 unsigned argIndex = 0;
2632 unsigned intArgRegNum = 0;
2633 unsigned fltArgRegNum = 0;
2636 regMaskTP argSkippedRegMask = RBM_NONE;
2637 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2638 #endif // _TARGET_ARM_
2640 #if defined(_TARGET_X86_)
2641 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2643 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2646 unsigned argSlots = 0;
2647 unsigned nonRegPassedStructSlots = 0;
2648 bool reMorphing = call->AreArgsComplete();
2649 bool callHasRetBuffArg = call->HasRetBufArg();
2651 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2652 bool callIsVararg = call->IsVarargs();
2655 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2656 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2657 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2658 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2659 bool hasStackArgCopy = false;
2662 #ifndef LEGACY_BACKEND
2663 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2664 // following the normal calling convention or in the normal argument registers. We either mark existing
2665 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2666 // non-standard arguments into the argument list, below.
2667 class NonStandardArgs
2669 struct NonStandardArg
2671 regNumber reg; // The register to be assigned to this non-standard argument.
2672 GenTree* node; // The tree node representing this non-standard argument.
2673 // Note that this must be updated if the tree node changes due to morphing!
2676 ArrayStack<NonStandardArg> args;
2679 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2683 //-----------------------------------------------------------------------------
2684 // Add: add a non-standard argument to the table of non-standard arguments
2687 // node - a GenTree node that has a non-standard argument.
2688 // reg - the register to assign to this node.
2693 void Add(GenTree* node, regNumber reg)
2695 NonStandardArg nsa = {reg, node};
2699 //-----------------------------------------------------------------------------
2700 // Find: Look for a GenTree* in the set of non-standard args.
2703 // node - a GenTree node to look for
2706 // The index of the non-standard argument (a non-negative, unique, stable number).
2707 // If the node is not a non-standard argument, return -1.
2709 int Find(GenTree* node)
2711 for (int i = 0; i < args.Height(); i++)
2713 if (node == args.Index(i).node)
2721 //-----------------------------------------------------------------------------
2722 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2723 // set the register to use for the node.
2726 // node - a GenTree node to look for
2727 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2728 // 'node' is found in the non-standard argument set.
2731 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2733 // 'false' otherwise (in this case, *pReg is unmodified).
2735 bool FindReg(GenTree* node, regNumber* pReg)
2737 for (int i = 0; i < args.Height(); i++)
2739 NonStandardArg& nsa = args.IndexRef(i);
2740 if (node == nsa.node)
2749 //-----------------------------------------------------------------------------
2750 // Replace: Replace the non-standard argument node at a given index. This is done when
2751 // the original node was replaced via morphing, but we need to continue to assign a
2752 // particular non-standard arg to it.
2755 // index - the index of the non-standard arg. It must exist.
2756 // node - the new GenTree node.
2761 void Replace(int index, GenTree* node)
2763 args.IndexRef(index).node = node;
2766 } nonStandardArgs(this);
2767 #endif // !LEGACY_BACKEND
2769 // Count of args. On first morph, this is counted before we've filled in the arg table.
2770 // On remorph, we grab it from the arg table.
2771 unsigned numArgs = 0;
2773 // Process the late arguments (which were determined by a previous caller).
2774 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2775 // may need to refer to it.
2778 // We need to reMorph the gtCallLateArgs early since that is what triggers
2779 // the expression folding and we need to have the final folded gtCallLateArgs
2780 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2781 // with the folded tree that represents the final optimized argument nodes.
2783 // However if a range-check needs to be generated for any of these late
2784 // arguments we also need to "know" what the stack depth will be when we generate
2785 // code to branch to the throw range check failure block as that is part of the
2786 // GC information contract for that block.
2788 // Since the late arguments are evaluated last we have pushed all of the
2789 // other arguments on the stack before we evaluate these late arguments,
2790 // so we record the stack depth on the first morph call when reMorphing
2791 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2793 if (call->gtCallLateArgs != nullptr)
2795 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2796 fgPtrArgCntCur += callStkLevel;
2797 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2798 flagsSummary |= call->gtCallLateArgs->gtFlags;
2799 fgPtrArgCntCur -= callStkLevel;
2801 assert(call->fgArgInfo != nullptr);
2802 call->fgArgInfo->RemorphReset();
2804 numArgs = call->fgArgInfo->ArgCount();
2808 // First we need to count the args
2809 if (call->gtCallObjp)
2813 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2818 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2819 // arguments registers that don't follow the normal progression of argument registers in the calling
2820 // convention (such as for the ARM64 fixed return buffer argument x8).
2822 // *********** NOTE *************
2823 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2824 // in the implementation of fast tail call.
2825 // *********** END NOTE *********
2826 CLANG_FORMAT_COMMENT_ANCHOR;
2828 #if !defined(LEGACY_BACKEND)
2829 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2830 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2831 // Set the argument registers correctly here.
2832 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2834 GenTreeArgList* args = call->gtCallArgs;
2835 GenTree* arg1 = args->Current();
2836 assert(arg1 != nullptr);
2837 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2839 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2840 #if defined(_TARGET_X86_)
2841 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2842 // hi part to be in EDX. This sets the argument registers up correctly.
2843 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2844 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2846 GenTreeArgList* args = call->gtCallArgs;
2847 GenTree* arg1 = args->Current();
2848 assert(arg1 != nullptr);
2849 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2851 args = args->Rest();
2852 GenTree* arg2 = args->Current();
2853 assert(arg2 != nullptr);
2854 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2856 #else // !defined(_TARGET_X86_)
2857 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2858 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2859 // convention for x86/SSE.
2861 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2863 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2865 args = call->gtCallArgs;
2866 assert(args != nullptr);
2867 assert(args->OperIsList());
2869 argx = call->gtCallArgs->Current();
2871 // We don't increment numArgs here, since we already counted this argument above.
2873 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2876 // We are allowed to have a Fixed Return Buffer argument combined
2877 // with any of the remaining non-standard arguments
2879 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2881 assert(!call->gtCallCookie);
2882 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2883 // It will be used only on the intercepted-for-host code path to copy the arguments.
2885 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2886 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2889 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2891 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2893 // indirect VSD stubs need the base of the indirection cell to be
2894 // passed in addition. At this point that is the value in gtCallAddr.
2895 // The actual call target will be derived from gtCallAddr in call
2898 // If it is a VSD call getting dispatched via tail call helper,
2899 // fgMorphTailCall() would materialize stub addr as an additional
2900 // parameter added to the original arg list and hence no need to
2901 // add as a non-standard arg.
2903 GenTree* arg = call->gtCallAddr;
2904 if (arg->OperIsLocal())
2906 arg = gtClone(arg, true);
2910 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2911 call->gtFlags |= GTF_ASG;
2913 noway_assert(arg != nullptr);
2915 // And push the stub address onto the list of arguments
2916 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2919 nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
2922 #endif // defined(_TARGET_X86_)
2923 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2925 assert(!call->IsUnmanaged());
2927 GenTree* arg = call->gtCallCookie;
2928 noway_assert(arg != nullptr);
2929 call->gtCallCookie = nullptr;
2931 #if defined(_TARGET_X86_)
2932 // x86 passes the cookie on the stack as the final argument to the call.
2933 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2934 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2937 *insertionPoint = gtNewListNode(arg, nullptr);
2938 #else // !defined(_TARGET_X86_)
2939 // All other architectures pass the cookie in a register.
2940 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2941 #endif // defined(_TARGET_X86_)
2943 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2946 // put destination into R10/EAX
2947 arg = gtClone(call->gtCallAddr, true);
2948 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2951 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2953 // finally change this call to a helper call
2954 call->gtCallType = CT_HELPER;
2955 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2957 #endif // !defined(LEGACY_BACKEND)
2959 // Allocate the fgArgInfo for the call node;
2961 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2964 if (varTypeIsStruct(call))
2966 fgFixupStructReturn(call);
2969 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
2970 * During the first call to fgMorphArgs we also record the
2971 * information about late arguments we have in 'fgArgInfo'.
2972 * This information is used later to contruct the gtCallLateArgs */
2974 /* Process the 'this' argument value, if present */
2976 argx = call->gtCallObjp;
2980 argx = fgMorphTree(argx);
2981 call->gtCallObjp = argx;
2982 flagsSummary |= argx->gtFlags;
2984 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
2986 assert(argIndex == 0);
2988 /* We must fill in or update the argInfo table */
2992 /* this is a register argument - possibly update it in the table */
2993 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
2997 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
2999 /* this is a register argument - put it in the table */
3000 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3001 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3003 false, REG_STK, nullptr
3004 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3007 // this can't be a struct.
3008 assert(argx->gtType != TYP_STRUCT);
3010 /* Increment the argument register count and argument index */
3011 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3014 #ifdef WINDOWS_AMD64_ABI
3015 // Whenever we pass an integer register argument
3016 // we skip the corresponding floating point register argument
3018 #endif // WINDOWS_AMD64_ABI
3022 noway_assert(!"the 'this' pointer can not be a floating point type");
3029 // Compute the maximum number of arguments that can be passed in registers.
3030 // For X86 we handle the varargs and unmanaged calling conventions
3032 if (call->gtFlags & GTF_CALL_POP_ARGS)
3034 noway_assert(intArgRegNum < MAX_REG_ARG);
3035 // No more register arguments for varargs (CALL_POP_ARGS)
3036 maxRegArgs = intArgRegNum;
3038 // Add in the ret buff arg
3039 if (callHasRetBuffArg)
3043 if (call->IsUnmanaged())
3045 noway_assert(intArgRegNum == 0);
3047 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3049 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3050 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3051 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3052 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3060 // Add in the ret buff arg
3061 if (callHasRetBuffArg)
3064 #endif // _TARGET_X86_
3066 /* Morph the user arguments */
3067 CLANG_FORMAT_COMMENT_ANCHOR;
3069 #if defined(_TARGET_ARM_)
3071 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3072 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3073 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3074 // appear in a lower-numbered register than floating point argument N. That is, argument
3075 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3076 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3077 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3078 // only back-fill single registers, since there is no way with these types to create
3079 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3080 // available (with 16 FP argument registers). Consider this code:
3082 // struct HFA { float x, y, z; }; // a three element HFA
3083 // void bar(float a1, // passed in f0
3084 // double a2, // passed in f2/f3; skip f1 for alignment
3085 // HFA a3, // passed in f4/f5/f6
3086 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3087 // HFA a5, // passed in f10/f11/f12
3088 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3090 // float a7, // passed in f1 (back-filled)
3091 // float a8, // passed in f7 (back-filled)
3092 // float a9, // passed in f13 (back-filled)
3093 // float a10) // passed on the stack in [OutArg+0]
3095 // Note that if we ever support FP types with larger alignment requirements, then there could
3096 // be more than single register back-fills.
3098 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3099 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3100 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3101 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3102 // and prevent any additional floating-point arguments from going in registers.
3104 bool anyFloatStackArgs = false;
3106 #endif // _TARGET_ARM_
3108 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3109 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3110 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3112 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3113 bool hasMultiregStructArgs = false;
3114 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3116 GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3118 #if FEATURE_MULTIREG_ARGS
3119 if (!hasStructArgument)
3121 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3123 #endif // FEATURE_MULTIREG_ARGS
3125 #ifndef LEGACY_BACKEND
3126 // Record the index of any nonStandard arg that we may be processing here, as we are
3127 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3128 GenTreePtr orig_argx = *parentArgx;
3129 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3130 #endif // !LEGACY_BACKEND
3132 argx = fgMorphTree(*parentArgx);
3134 flagsSummary |= argx->gtFlags;
3136 assert(args->OperIsList());
3137 assert(argx == args->Current());
3139 #ifndef LEGACY_BACKEND
3140 if ((nonStandard_index != -1) && (argx != orig_argx))
3142 // We need to update the node field for this nonStandard arg here
3143 // as it was changed by the call to fgMorphTree
3144 nonStandardArgs.Replace(nonStandard_index, argx);
3146 #endif // !LEGACY_BACKEND
3148 /* Change the node to TYP_I_IMPL so we don't report GC info
3149 * NOTE: We deferred this from the importer because of the inliner */
3151 if (argx->IsVarAddr())
3153 argx->gtType = TYP_I_IMPL;
3156 bool passUsingFloatRegs;
3157 unsigned argAlign = 1;
3158 // Setup any HFA information about 'argx'
3159 var_types hfaType = GetHfaType(argx);
3160 bool isHfaArg = varTypeIsFloating(hfaType);
3161 unsigned hfaSlots = 0;
3165 hfaSlots = GetHfaCount(argx);
3167 // If we have a HFA struct it's possible we transition from a method that originally
3168 // only had integer types to now start having FP types. We have to communicate this
3169 // through this flag since LSRA later on will use this flag to determine whether
3170 // or not to track the FP register set.
3172 compFloatingPointUsed = true;
3176 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3177 bool isRegArg = false;
3178 bool isNonStandard = false;
3179 regNumber nonStdRegNum = REG_NA;
3181 fgArgTabEntryPtr argEntry = nullptr;
3185 argEntry = gtArgEntryByArgNum(call, argIndex);
3190 bool passUsingIntRegs;
3193 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3194 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3198 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3199 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3202 GenTreePtr curArg = argx;
3203 // If late args have already been computed, use the node in the argument table.
3204 if (argEntry != NULL && argEntry->isTmp)
3206 curArg = argEntry->node;
3209 // We don't use the "size" return value from InferOpSizeAlign().
3210 codeGen->InferOpSizeAlign(curArg, &argAlign);
3212 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3213 argAlign /= TARGET_POINTER_SIZE;
3217 if (passUsingFloatRegs)
3219 if (fltArgRegNum % 2 == 1)
3221 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3225 else if (passUsingIntRegs)
3227 if (intArgRegNum % 2 == 1)
3229 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3234 if (argSlots % 2 == 1)
3240 #elif defined(_TARGET_ARM64_)
3244 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3248 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3251 #elif defined(_TARGET_AMD64_)
3254 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3258 passUsingFloatRegs = varTypeIsFloating(argx);
3260 #elif defined(_TARGET_X86_)
3262 passUsingFloatRegs = false;
3265 #error Unsupported or unset target architecture
3268 bool isBackFilled = false;
3269 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3270 var_types structBaseType = TYP_STRUCT;
3271 unsigned structSize = 0;
3273 bool isStructArg = varTypeIsStruct(argx);
3277 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3278 // Get the struct description for the already completed struct argument.
3279 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3280 assert(fgEntryPtr != nullptr);
3282 // As described in few other places, this can happen when the argx was morphed
3283 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3284 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3285 // was a struct and the struct classification.
3286 isStructArg = fgEntryPtr->isStruct;
3290 structDesc.CopyFrom(fgEntryPtr->structDesc);
3292 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3294 assert(argEntry != nullptr);
3295 if (argEntry->IsBackFilled())
3298 size = argEntry->numRegs;
3299 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3301 isBackFilled = true;
3303 else if (argEntry->regNum == REG_STK)
3306 assert(argEntry->numRegs == 0);
3307 size = argEntry->numSlots;
3312 assert(argEntry->numRegs > 0);
3313 size = argEntry->numRegs + argEntry->numSlots;
3316 // This size has now been computed
3322 // Figure out the size of the argument. This is either in number of registers, or number of
3323 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3326 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3328 #if defined(_TARGET_AMD64_)
3329 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3332 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3336 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3337 TARGET_POINTER_SIZE)) /
3338 TARGET_POINTER_SIZE;
3339 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3342 hasMultiregStructArgs = true;
3345 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3346 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3347 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3348 #elif defined(_TARGET_ARM64_)
3353 size = GetHfaCount(argx);
3354 // HFA structs are passed by value in multiple registers
3355 hasMultiregStructArgs = true;
3359 // Structs are either passed in 1 or 2 (64-bit) slots
3360 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3361 TARGET_POINTER_SIZE)) /
3362 TARGET_POINTER_SIZE;
3366 // Structs that are the size of 2 pointers are passed by value in multiple registers
3367 hasMultiregStructArgs = true;
3371 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3372 // reference (to a copy)
3375 // Note that there are some additional rules for multireg structs.
3376 // (i.e they cannot be split between registers and the stack)
3380 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3382 #elif defined(_TARGET_ARM_)
3385 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3386 TARGET_POINTER_SIZE)) /
3387 TARGET_POINTER_SIZE;
3392 size = genTypeStSz(argx->gtType);
3394 #elif defined(_TARGET_X86_)
3395 size = genTypeStSz(argx->gtType);
3397 #error Unsupported or unset target architecture
3398 #endif // _TARGET_XXX_
3403 size = GetHfaCount(argx);
3405 #endif // _TARGET_ARM_
3408 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3409 if (argx->gtOper == GT_MKREFANY)
3411 if (varTypeIsStruct(argx))
3415 #ifdef _TARGET_AMD64_
3416 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3417 if (varTypeIsStruct(argx))
3419 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3420 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3421 size = roundupSize / TARGET_POINTER_SIZE;
3422 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3425 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3433 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3435 GenTreePtr argObj = argx;
3436 GenTreePtr* parentOfArgObj = parentArgx;
3438 assert(args->OperIsList());
3439 assert(argx == args->Current());
3441 /* The GT_OBJ may be be a child of a GT_COMMA */
3442 while (argObj->gtOper == GT_COMMA)
3444 parentOfArgObj = &argObj->gtOp.gtOp2;
3445 argObj = argObj->gtOp.gtOp2;
3448 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3449 if (argObj->gtOper != GT_OBJ)
3451 BADCODE("illegal argument tree in fgMorphArgs");
3454 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3455 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3456 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3457 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3459 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3460 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3461 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3463 structSize = originalSize;
3465 structPassingKind howToPassStruct;
3466 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3468 #ifdef _TARGET_ARM64_
3469 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3470 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3472 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3474 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3475 // we can read 4 or 8 bytes from the LclVar to pass this arg
3476 originalSize = genTypeSize(structBaseType);
3479 #endif // _TARGET_ARM64_
3481 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3482 // On System V OS-es a struct is never passed by reference.
3483 // It is either passed by value on the stack or in registers.
3484 bool passStructInRegisters = false;
3485 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3486 bool passStructByRef = false;
3487 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3489 // The following if-then-else needs to be carefully refactored.
3490 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3491 // into a GT_IND of the appropriate size.
3492 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3493 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3494 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3495 // It also can't do this if we have a HFA arg,
3496 // unless we have a 1-elem HFA in which case we want to do the optimization.
3497 CLANG_FORMAT_COMMENT_ANCHOR;
3499 #ifndef _TARGET_X86_
3500 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3501 // Check for struct argument with size 1, 2, 4 or 8 bytes
3502 // As we can optimize these by turning them into a GT_IND of the correct type
3504 // Check for cases that we cannot optimize:
3506 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3507 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3508 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3509 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3511 // Normalize 'size' to the number of pointer sized items
3512 // 'size' is the number of register slots that we will use to pass the argument
3513 size = roundupSize / TARGET_POINTER_SIZE;
3514 #if defined(_TARGET_AMD64_)
3515 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3516 size = 1; // This must be copied to a temp and passed by address
3517 passStructByRef = true;
3518 copyBlkClass = objClass;
3519 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3520 if (!structDesc.passedInRegisters)
3522 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3523 bool needCpyBlk = false;
3524 if (lclVar != nullptr)
3526 // If the struct is promoted to registers, it has to be materialized
3527 // on stack. We may want to support promoted structures in
3528 // codegening pugarg_stk instead of creating a copy here.
3529 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3530 needCpyBlk = varDsc->lvPromoted;
3534 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3535 // sets structDesc.passedInRegisters to be false.
3537 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3538 // by rationalizer. For now we will let SIMD struct arg to be copied to
3539 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3542 // | \--* addr byref
3543 // | | /--* lclVar simd16 V05 loc4
3544 // | \--* simd simd16 int -
3545 // | \--* lclVar simd16 V08 tmp1
3547 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3548 // so that we don't need to generate a copy here.
3549 GenTree* addr = argObj->gtOp.gtOp1;
3550 if (addr->OperGet() == GT_ADDR)
3552 GenTree* addrChild = addr->gtOp.gtOp1;
3553 if (addrChild->OperGet() == GT_SIMD)
3559 passStructInRegisters = false;
3562 copyBlkClass = objClass;
3566 copyBlkClass = NO_CLASS_HANDLE;
3571 // The objClass is used to materialize the struct on stack.
3572 // For SystemV, the code below generates copies for struct arguments classified
3573 // as register argument.
3574 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3575 // can be passed on registers or can be copied directly to outgoing area.
3576 passStructInRegisters = true;
3577 copyBlkClass = objClass;
3580 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3581 #elif defined(_TARGET_ARM64_)
3582 if ((size > 2) && !isHfaArg)
3584 size = 1; // This must be copied to a temp and passed by address
3585 passStructByRef = true;
3586 copyBlkClass = objClass;
3591 // If we're passing a promoted struct local var,
3592 // we may need to skip some registers due to alignment; record those.
3593 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3596 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3597 if (varDsc->lvPromoted)
3599 assert(argObj->OperGet() == GT_OBJ);
3600 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3602 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3606 #endif // _TARGET_ARM_
3608 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3609 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3610 // generated for struct 1, 2, 4, or 8.
3611 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3613 // change our GT_OBJ into a GT_IND of the correct type.
3614 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3617 assert(howToPassStruct == SPK_PrimitiveType);
3619 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3623 // If we reach here with an HFA arg it has to be a one element HFA
3624 assert(hfaSlots == 1);
3625 structBaseType = hfaType; // change the indirection type to a floating point type
3628 noway_assert(structBaseType != TYP_UNKNOWN);
3630 argObj->ChangeOper(GT_IND);
3632 // Now see if we can fold *(&X) into X
3633 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3635 GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3637 // Keep the DONT_CSE flag in sync
3638 // (as the addr always marks it for its op1)
3639 temp->gtFlags &= ~GTF_DONT_CSE;
3640 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3641 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3642 DEBUG_DESTROY_NODE(argObj); // GT_IND
3645 *parentOfArgObj = temp;
3647 // If the OBJ had been the top level node, we've now changed argx.
3648 if (parentOfArgObj == parentArgx)
3653 if (argObj->gtOper == GT_LCL_VAR)
3655 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3656 LclVarDsc* varDsc = &lvaTable[lclNum];
3658 if (varDsc->lvPromoted)
3660 if (varDsc->lvFieldCnt == 1)
3662 // get the first and only promoted field
3663 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3664 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3666 // we will use the first and only promoted field
3667 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3669 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3670 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3672 // Just use the existing field's type
3673 argObj->gtType = fieldVarDsc->TypeGet();
3677 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3679 argObj->ChangeOper(GT_LCL_FLD);
3680 argObj->gtType = structBaseType;
3682 assert(varTypeCanReg(argObj->TypeGet()));
3683 assert(copyBlkClass == NO_CLASS_HANDLE);
3687 // use GT_LCL_FLD to swizzle the single field struct to a new type
3688 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3689 argObj->ChangeOper(GT_LCL_FLD);
3690 argObj->gtType = structBaseType;
3695 // The struct fits into a single register, but it has been promoted into its
3696 // constituent fields, and so we have to re-assemble it
3697 copyBlkClass = objClass;
3699 // Alignment constraints may cause us not to use (to "skip") some argument
3700 // registers. Add those, if any, to the skipped (int) arg reg mask.
3701 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3702 #endif // _TARGET_ARM_
3705 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3707 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3708 argObj->ChangeOper(GT_LCL_FLD);
3709 argObj->gtType = structBaseType;
3714 // Not a GT_LCL_VAR, so we can just change the type on the node
3715 argObj->gtType = structBaseType;
3717 assert(varTypeCanReg(argObj->TypeGet()) ||
3718 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3722 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3724 #endif // not _TARGET_X86_
3725 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3726 if ((structBaseType == TYP_STRUCT) &&
3727 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3728 !passStructInRegisters
3729 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3731 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3734 if (isHfaArg && passUsingFloatRegs)
3736 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3740 // If the valuetype size is not a multiple of sizeof(void*),
3741 // we must copyblk to a temp before doing the obj to avoid
3742 // the obj reading memory past the end of the valuetype
3743 CLANG_FORMAT_COMMENT_ANCHOR;
3745 if (roundupSize > originalSize)
3747 copyBlkClass = objClass;
3749 // There are a few special cases where we can omit using a CopyBlk
3750 // where we normally would need to use one.
3752 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3754 copyBlkClass = NO_CLASS_HANDLE;
3758 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3763 #ifndef _TARGET_X86_
3764 // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between
3765 // registers and stack?
3768 hasMultiregStructArgs = true;
3770 #endif // !_TARGET_X86_
3773 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3777 // Figure out if the argument will be passed in a register.
3780 if (isRegParamType(genActualType(argx->TypeGet()))
3781 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3782 && (!isStructArg || structDesc.passedInRegisters)
3787 if (passUsingFloatRegs)
3789 // First, see if it can be back-filled
3790 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3791 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3792 (size == 1)) // The size to back-fill is one float register
3794 // Back-fill the register.
3795 isBackFilled = true;
3796 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3797 fltArgSkippedRegMask &=
3798 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3799 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3800 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3803 // Does the entire float, double, or HFA fit in the FP arg registers?
3804 // Check if the last register needed is still in the argument register range.
3805 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3809 anyFloatStackArgs = true;
3814 isRegArg = intArgRegNum < MAX_REG_ARG;
3816 #elif defined(_TARGET_ARM64_)
3817 if (passUsingFloatRegs)
3819 // Check if the last register needed is still in the fp argument register range.
3820 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3822 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3823 if (isHfaArg && !isRegArg)
3825 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3828 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3829 size = roundupSize / TARGET_POINTER_SIZE;
3831 // We also must update fltArgRegNum so that we no longer try to
3832 // allocate any new floating point registers for args
3833 // This prevents us from backfilling a subsequent arg into d7
3835 fltArgRegNum = MAX_FLOAT_REG_ARG;
3840 // Check if the last register needed is still in the int argument register range.
3841 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3843 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3844 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3845 // This prevents us from backfilling a subsequent arg into x7
3847 if (!isRegArg && (size > 1))
3849 // We also must update intArgRegNum so that we no longer try to
3850 // allocate any new general purpose registers for args
3852 intArgRegNum = maxRegArgs;
3855 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3857 #if defined(UNIX_AMD64_ABI)
3859 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3860 // Here a struct can be passed in register following the classifications of its members and size.
3861 // Now make sure there are actually enough registers to do so.
3864 unsigned int structFloatRegs = 0;
3865 unsigned int structIntRegs = 0;
3866 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3868 if (structDesc.IsIntegralSlot(i))
3872 else if (structDesc.IsSseSlot(i))
3878 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3879 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3882 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3884 if (passUsingFloatRegs)
3886 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3890 isRegArg = intArgRegNum < MAX_REG_ARG;
3893 #else // !defined(UNIX_AMD64_ABI)
3894 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3895 #endif // !defined(UNIX_AMD64_ABI)
3896 #endif // _TARGET_ARM_
3903 #ifndef LEGACY_BACKEND
3904 // If there are nonstandard args (outside the calling convention) they were inserted above
3905 // and noted them in a table so we can recognize them here and build their argInfo.
3907 // They should not affect the placement of any other args or stack space required.
3908 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3909 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3910 if (isNonStandard && (nonStdRegNum == REG_STK))
3914 #if defined(_TARGET_X86_)
3915 else if (call->IsTailCallViaHelper())
3917 // We have already (before calling fgMorphArgs()) appended the 4 special args
3918 // required by the x86 tailcall helper. These args are required to go on the
3919 // stack. Force them to the stack here.
3920 assert(numArgs >= 4);
3921 if (argIndex >= numArgs - 4)
3926 #endif // defined(_TARGET_X86_)
3927 #endif // !LEGACY_BACKEND
3928 } // end !reMorphing
3931 // Now we know if the argument goes in registers or not and how big it is,
3932 // whether we had to just compute it or this is a re-morph call and we looked it up.
3934 CLANG_FORMAT_COMMENT_ANCHOR;
3937 // If we ever allocate a floating point argument to the stack, then all
3938 // subsequent HFA/float/double arguments go on the stack.
3939 if (!isRegArg && passUsingFloatRegs)
3941 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3943 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3947 // If we think we're going to split a struct between integer registers and the stack, check to
3948 // see if we've already assigned a floating-point arg to the stack.
3949 if (isRegArg && // We decided above to use a register for the argument
3950 !passUsingFloatRegs && // We're using integer registers
3951 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3952 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3954 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3956 // Skip the rest of the integer argument registers
3957 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
3959 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3963 #endif // _TARGET_ARM_
3967 regNumber nextRegNum = REG_STK;
3968 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3969 regNumber nextOtherRegNum = REG_STK;
3970 unsigned int structFloatRegs = 0;
3971 unsigned int structIntRegs = 0;
3973 if (isStructArg && structDesc.passedInRegisters)
3975 // It is a struct passed in registers. Assign the next available register.
3976 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
3977 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
3978 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3980 if (structDesc.IsIntegralSlot(i))
3982 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
3985 else if (structDesc.IsSseSlot(i))
3987 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
3993 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3995 // fill in or update the argInfo table
3996 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
3997 : genMapIntRegArgNumToRegNum(intArgRegNum);
4000 #ifdef _TARGET_AMD64_
4001 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4006 fgArgTabEntryPtr newArgEntry;
4009 // This is a register argument - possibly update it in the table
4010 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4016 nextRegNum = nonStdRegNum;
4019 // This is a register argument - put it in the table
4020 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4021 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4023 isStructArg, nextOtherRegNum, &structDesc
4024 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4027 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4028 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4029 newArgEntry->SetIsBackFilled(isBackFilled);
4030 newArgEntry->isNonStandard = isNonStandard;
4033 if (newArgEntry->isNonStandard)
4038 // Set up the next intArgRegNum and fltArgRegNum values.
4041 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4044 intArgRegNum += structIntRegs;
4045 fltArgRegNum += structFloatRegs;
4048 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4050 if (passUsingFloatRegs)
4052 fltArgRegNum += size;
4054 #ifdef WINDOWS_AMD64_ABI
4055 // Whenever we pass an integer register argument
4056 // we skip the corresponding floating point register argument
4057 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4058 #endif // WINDOWS_AMD64_ABI
4060 if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4062 // This indicates a partial enregistration of a struct type
4063 assert(varTypeIsStruct(argx));
4064 unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4065 assert((unsigned char)numRegsPartial == numRegsPartial);
4066 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4067 fltArgRegNum = MAX_FLOAT_REG_ARG;
4069 #endif // _TARGET_ARM_
4073 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4075 // we are setting up the fixed return buffer register argument
4076 // so don't increment intArgRegNum
4081 // Increment intArgRegNum by 'size' registers
4082 intArgRegNum += size;
4085 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4086 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4087 #endif // _TARGET_AMD64_
4089 if (intArgRegNum > MAX_REG_ARG)
4091 // This indicates a partial enregistration of a struct type
4092 assert((isStructArg) || argx->OperIsCopyBlkOp() ||
4093 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4094 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4095 assert((unsigned char)numRegsPartial == numRegsPartial);
4096 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4097 intArgRegNum = MAX_REG_ARG;
4098 fgPtrArgCntCur += size - numRegsPartial;
4100 #endif // _TARGET_ARM_
4105 else // We have an argument that is not passed in a register
4107 fgPtrArgCntCur += size;
4109 // If the register arguments have not been determined then we must fill in the argInfo
4113 // This is a stack argument - possibly update it in the table
4114 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4118 // This is a stack argument - put it in the table
4119 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4120 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4124 if (copyBlkClass != NO_CLASS_HANDLE)
4126 noway_assert(!reMorphing);
4127 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4128 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4130 // This can cause a GTF_EXCEPT flag to be set.
4131 // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4132 // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4133 // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4134 // any struct arguments.
4135 // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4136 flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4138 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4139 hasStackArgCopy = true;
4143 #ifndef LEGACY_BACKEND
4144 if (argx->gtOper == GT_MKREFANY)
4146 // 'Lower' the MKREFANY tree and insert it.
4147 noway_assert(!reMorphing);
4151 // Build the mkrefany as a GT_FIELD_LIST
4152 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4153 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4154 (void)new (this, GT_FIELD_LIST)
4155 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4156 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4157 fp->node = fieldList;
4158 args->gtOp.gtOp1 = fieldList;
4160 #else // !_TARGET_X86_
4163 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4164 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4165 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4167 // Build the mkrefany as a comma node:
4168 // (tmp.ptr=argx),(tmp.type=handle)
4169 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4170 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4171 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4172 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4173 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4174 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4176 GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4177 GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4178 GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4180 // Change the expression to "(tmp=val)"
4181 args->gtOp.gtOp1 = asg;
4183 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4184 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4185 lvaSetVarAddrExposed(tmp);
4186 #endif // !_TARGET_X86_
4188 #endif // !LEGACY_BACKEND
4190 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4193 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4194 if ((lclNode != nullptr) &&
4195 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4197 // Make a GT_FIELD_LIST of the field lclVars.
4198 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4199 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4200 GenTreeFieldList* fieldList = nullptr;
4201 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4202 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4204 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4205 if (fieldList == nullptr)
4207 lcl->SetLclNum(fieldLclNum);
4208 lcl->ChangeOper(GT_LCL_VAR);
4209 lcl->gtType = fieldVarDsc->lvType;
4210 fieldList = new (this, GT_FIELD_LIST)
4211 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4212 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4213 fp->node = fieldList;
4214 args->gtOp.gtOp1 = fieldList;
4218 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4219 fieldList = new (this, GT_FIELD_LIST)
4220 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4225 #endif // defined (_TARGET_X86_) && !defined(LEGACY_BACKEND)
4227 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4228 if (isStructArg && !isRegArg)
4230 nonRegPassedStructSlots += size;
4233 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4237 } // end foreach argument loop
4241 call->fgArgInfo->ArgsComplete();
4243 #ifdef LEGACY_BACKEND
4244 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4245 #if defined(_TARGET_ARM_)
4246 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4248 if (fltArgRegNum > 0)
4250 #if defined(_TARGET_ARM_)
4251 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4254 #endif // LEGACY_BACKEND
4257 if (call->gtCallArgs)
4259 UpdateGT_LISTFlags(call->gtCallArgs);
4262 /* Process the function address, if indirect call */
4264 if (call->gtCallType == CT_INDIRECT)
4266 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4269 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4271 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4276 /* Remember the maximum value we ever see */
4278 if (fgPtrArgCntMax < fgPtrArgCntCur)
4280 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4281 fgPtrArgCntMax = fgPtrArgCntCur;
4284 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4285 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4287 /* The call will pop all the arguments we pushed */
4289 fgPtrArgCntCur = genPtrArgCntSav;
4291 #if FEATURE_FIXED_OUT_ARGS
4293 // Record the outgoing argument size. If the call is a fast tail
4294 // call, it will setup its arguments in incoming arg area instead
4295 // of the out-going arg area, so we don't need to track the
4296 // outgoing arg size.
4297 if (!call->IsFastTailCall())
4299 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4301 #if defined(UNIX_AMD64_ABI)
4302 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4304 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4306 // First slots go in registers only, no stack needed.
4307 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4308 // and ignores floating point args (it is overly conservative in that case).
4309 preallocatedArgCount = nonRegPassedStructSlots;
4310 if (argSlots > MAX_REG_ARG)
4312 preallocatedArgCount += argSlots - MAX_REG_ARG;
4314 #endif // UNIX_AMD64_ABI
4316 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4317 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4322 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4323 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4327 #endif // FEATURE_FIXED_OUT_ARGS
4329 /* Update the 'side effect' flags value for the call */
4331 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4333 // If the register arguments have already been determined
4334 // or we have no register arguments then we don't need to
4335 // call SortArgs() and EvalArgsToTemps()
4337 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4338 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4339 // is added to make sure to call EvalArgsToTemp.
4340 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4341 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4343 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4346 // This is the first time that we morph this call AND it has register arguments.
4347 // Follow into the code below and do the 'defer or eval to temp' analysis.
4349 call->fgArgInfo->SortArgs();
4351 call->fgArgInfo->EvalArgsToTemps();
4353 // We may have updated the arguments
4354 if (call->gtCallArgs)
4356 UpdateGT_LISTFlags(call->gtCallArgs);
4360 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4362 // Rewrite the struct args to be passed by value on stack or in registers.
4363 fgMorphSystemVStructArgs(call, hasStructArgument);
4365 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4367 #ifndef LEGACY_BACKEND
4368 // In the future we can migrate UNIX_AMD64 to use this
4369 // method instead of fgMorphSystemVStructArgs
4371 // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4372 if (hasMultiregStructArgs)
4374 fgMorphMultiregStructArgs(call);
4376 #endif // LEGACY_BACKEND
4378 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4383 fgArgInfoPtr argInfo = call->fgArgInfo;
4384 for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4386 fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4387 curArgEntry->Dump();
4395 #pragma warning(pop)
4398 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4399 // fgMorphSystemVStructArgs:
4400 // Rewrite the struct args to be passed by value on stack or in registers.
4403 // call: The call whose arguments need to be morphed.
4404 // hasStructArgument: Whether this call has struct arguments.
4406 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4408 unsigned flagsSummary = 0;
4412 if (hasStructArgument)
4414 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4416 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4418 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4419 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4420 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4421 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4422 // otherwise points to the list in the late args list.
4423 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4424 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4425 assert(fgEntryPtr != nullptr);
4426 GenTreePtr argx = fgEntryPtr->node;
4427 GenTreePtr lateList = nullptr;
4428 GenTreePtr lateNode = nullptr;
4432 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4434 assert(list->OperIsList());
4436 GenTreePtr argNode = list->Current();
4437 if (argx == argNode)
4444 assert(lateList != nullptr && lateNode != nullptr);
4446 GenTreePtr arg = argx;
4447 bool argListCreated = false;
4449 var_types type = arg->TypeGet();
4451 if (varTypeIsStruct(type))
4453 var_types originalType = type;
4454 // If we have already processed the arg...
4455 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4460 // If already OBJ it is set properly already.
4461 if (arg->OperGet() == GT_OBJ)
4463 assert(!fgEntryPtr->structDesc.passedInRegisters);
4467 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4468 (arg->OperGet() == GT_ADDR &&
4469 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4471 GenTreeLclVarCommon* lclCommon =
4472 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4473 if (fgEntryPtr->structDesc.passedInRegisters)
4475 if (fgEntryPtr->structDesc.eightByteCount == 1)
4477 // Change the type and below the code will change the LclVar to a LCL_FLD
4478 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4479 fgEntryPtr->structDesc.eightByteSizes[0]);
4481 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4483 // Create LCL_FLD for each eightbyte.
4484 argListCreated = true;
4487 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4489 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4490 fgEntryPtr->structDesc.eightByteSizes[0]);
4491 GenTreeFieldList* fieldList =
4492 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4493 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4496 // Second eightbyte.
4497 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4498 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4499 .eightByteClassifications[1],
4500 fgEntryPtr->structDesc.eightByteSizes[1]),
4501 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4503 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4504 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4505 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4509 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4514 // If we didn't change the type of the struct, it means
4515 // its classification doesn't support to be passed directly through a
4516 // register, so we need to pass a pointer to the destination where
4517 // where we copied the struct to.
4518 if (!argListCreated)
4520 if (fgEntryPtr->structDesc.passedInRegisters)
4526 // Make sure this is an addr node.
4527 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4529 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4532 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4534 // Create an Obj of the temp to use it as a call argument.
4535 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4542 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4543 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4544 assert(fgEntryPtr != nullptr);
4545 GenTreePtr argx = fgEntryPtr->node;
4546 GenTreePtr lateList = nullptr;
4547 GenTreePtr lateNode = nullptr;
4550 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4552 assert(list->OperIsList());
4554 GenTreePtr argNode = list->Current();
4555 if (argx == argNode)
4562 assert(lateList != nullptr && lateNode != nullptr);
4565 fgEntryPtr->node = arg;
4568 lateList->gtOp.gtOp1 = arg;
4572 args->gtOp.gtOp1 = arg;
4579 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4581 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4583 //-----------------------------------------------------------------------------
4584 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4585 // call fgMorphMultiregStructArg on each of them.
4588 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4591 // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4592 // The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4593 // which is only used for struct arguments.
4594 // If this method fails to find any TYP_STRUCT arguments it will assert.
4596 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4600 bool foundStructArg = false;
4601 unsigned initialFlags = call->gtFlags;
4602 unsigned flagsSummary = 0;
4603 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4605 // Currently only ARM64 is using this method to morph the MultiReg struct args
4606 // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
4608 CLANG_FORMAT_COMMENT_ANCHOR;
4611 NYI_ARM("fgMorphMultiregStructArgs");
4614 assert(!"Logic error: no MultiregStructArgs for X86");
4616 #ifdef _TARGET_AMD64_
4617 #if defined(UNIX_AMD64_ABI)
4618 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4619 #else // WINDOWS_AMD64_ABI
4620 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4621 #endif // !UNIX_AMD64_ABI
4624 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4626 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4627 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4628 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4629 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4630 // otherwise points to the list in the late args list.
4631 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4632 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4633 assert(fgEntryPtr != nullptr);
4634 GenTreePtr argx = fgEntryPtr->node;
4635 GenTreePtr lateList = nullptr;
4636 GenTreePtr lateNode = nullptr;
4640 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4642 assert(list->OperIsList());
4644 GenTreePtr argNode = list->Current();
4645 if (argx == argNode)
4652 assert(lateList != nullptr && lateNode != nullptr);
4655 GenTreePtr arg = argx;
4657 if (arg->TypeGet() == TYP_STRUCT)
4659 foundStructArg = true;
4661 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4663 // Did we replace 'argx' with a new tree?
4666 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4668 // link the new arg node into either the late arg list or the gtCallArgs list
4671 lateList->gtOp.gtOp1 = arg;
4675 args->gtOp.gtOp1 = arg;
4681 // We should only call this method when we actually have one or more multireg struct args
4682 assert(foundStructArg);
4685 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4688 //-----------------------------------------------------------------------------
4689 // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
4690 // Morph the argument into a set of GT_FIELD_LIST nodes.
4693 // arg - A GenTree node containing a TYP_STRUCT arg that
4694 // is to be passed in multiple registers
4695 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4698 // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4699 // for passing in multiple registers.
4700 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4701 // and if they are at the appropriate offsets we will use the struct promted fields
4702 // in the GT_FIELD_LIST nodes that we create.
4703 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4704 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4705 // this also forces the struct to be stack allocated into the local frame.
4706 // For the GT_OBJ case will clone the address expression and generate two (or more)
4708 // Currently the implementation only handles ARM64 and will NYI for other architectures.
4710 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4712 assert(arg->TypeGet() == TYP_STRUCT);
4714 #ifndef _TARGET_ARM64_
4715 NYI("fgMorphMultiregStructArg requires implementation for this target");
4718 #if FEATURE_MULTIREG_ARGS
4719 // Examine 'arg' and setup argValue objClass and structSize
4721 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4722 GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
4723 unsigned structSize = 0;
4725 if (arg->OperGet() == GT_OBJ)
4727 GenTreeObj* argObj = arg->AsObj();
4728 objClass = argObj->gtClass;
4729 structSize = info.compCompHnd->getClassSize(objClass);
4731 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4733 if (argObj->gtOp1->OperGet() == GT_ADDR)
4735 argValue = argObj->gtOp1->gtOp.gtOp1;
4738 else if (arg->OperGet() == GT_LCL_VAR)
4740 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4741 unsigned varNum = varNode->gtLclNum;
4742 assert(varNum < lvaCount);
4743 LclVarDsc* varDsc = &lvaTable[varNum];
4745 objClass = lvaGetStruct(varNum);
4746 structSize = varDsc->lvExactSize;
4748 noway_assert(objClass != nullptr);
4750 var_types hfaType = TYP_UNDEF;
4751 var_types elemType = TYP_UNDEF;
4752 unsigned elemCount = 0;
4753 unsigned elemSize = 0;
4754 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4756 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4757 if (varTypeIsFloating(hfaType))
4760 elemSize = genTypeSize(elemType);
4761 elemCount = structSize / elemSize;
4762 assert(elemSize * elemCount == structSize);
4763 for (unsigned inx = 0; inx < elemCount; inx++)
4765 type[inx] = elemType;
4770 assert(structSize <= 2 * TARGET_POINTER_SIZE);
4771 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4772 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4774 type[0] = getJitGCType(gcPtrs[0]);
4775 type[1] = getJitGCType(gcPtrs[1]);
4777 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4779 // We can safely widen this to 16 bytes since we are loading from
4780 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4781 // lives in the stack frame or will be a promoted field.
4783 elemSize = TARGET_POINTER_SIZE;
4784 structSize = 2 * TARGET_POINTER_SIZE;
4786 else // we must have a GT_OBJ
4788 assert(argValue->OperGet() == GT_OBJ);
4790 // We need to load the struct from an arbitrary address
4791 // and we can't read past the end of the structSize
4792 // We adjust the second load type here
4794 if (structSize < 2 * TARGET_POINTER_SIZE)
4796 switch (structSize - TARGET_POINTER_SIZE)
4802 type[1] = TYP_SHORT;
4808 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4814 // We should still have a TYP_STRUCT
4815 assert(argValue->TypeGet() == TYP_STRUCT);
4817 GenTreeFieldList* newArg = nullptr;
4819 // Are we passing a struct LclVar?
4821 if (argValue->OperGet() == GT_LCL_VAR)
4823 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4824 unsigned varNum = varNode->gtLclNum;
4825 assert(varNum < lvaCount);
4826 LclVarDsc* varDsc = &lvaTable[varNum];
4828 // At this point any TYP_STRUCT LclVar must be a 16-byte struct
4829 // or an HFA struct, both which are passed by value.
4831 assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4833 varDsc->lvIsMultiRegArg = true;
4838 JITDUMP("Multireg struct argument V%02u : ");
4843 // This local variable must match the layout of the 'objClass' type exactly
4844 if (varDsc->lvIsHfa())
4846 // We have a HFA struct
4847 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4848 noway_assert(elemSize == genTypeSize(elemType));
4849 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4850 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4852 for (unsigned inx = 0; (inx < elemCount); inx++)
4854 noway_assert(type[inx] == elemType);
4859 // We must have a 16-byte struct (non-HFA)
4860 noway_assert(elemCount == 2);
4862 for (unsigned inx = 0; inx < elemCount; inx++)
4864 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4866 // We setup the type[inx] value above using the GC info from 'objClass'
4867 // This GT_LCL_VAR must have the same GC layout info
4869 if (currentGcLayoutType != TYPE_GC_NONE)
4871 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4875 // We may have use a small type when we setup the type[inx] values above
4876 // We can safely widen this to TYP_I_IMPL
4877 type[inx] = TYP_I_IMPL;
4882 // Is this LclVar a promoted struct with exactly 2 fields?
4883 // TODO-ARM64-CQ: Support struct promoted HFA types here
4884 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4886 // See if we have two promoted fields that start at offset 0 and 8?
4887 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4888 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4890 // Did we find the promoted fields at the necessary offsets?
4891 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4893 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4894 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4896 var_types loType = loVarDsc->lvType;
4897 var_types hiType = hiVarDsc->lvType;
4899 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4901 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4902 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4904 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4907 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4912 // We can use the struct promoted field as the two arguments
4914 GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4915 GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4917 // Create a new tree for 'arg'
4918 // replace the existing LDOBJ(ADDR(LCLVAR))
4919 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4921 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4922 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4929 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4931 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4935 // If we didn't set newarg to a new List Node tree
4937 if (newArg == nullptr)
4939 if (fgEntryPtr->regNum == REG_STK)
4941 // We leave this stack passed argument alone
4945 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
4946 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
4948 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4950 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4951 unsigned varNum = varNode->gtLclNum;
4952 assert(varNum < lvaCount);
4953 LclVarDsc* varDsc = &lvaTable[varNum];
4955 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
4956 unsigned lastOffset = baseOffset + (elemCount * elemSize);
4958 // The allocated size of our LocalVar must be at least as big as lastOffset
4959 assert(varDsc->lvSize() >= lastOffset);
4961 if (varDsc->lvStructGcCount > 0)
4963 // alignment of the baseOffset is required
4964 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
4965 noway_assert(elemSize == TARGET_POINTER_SIZE);
4966 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
4967 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
4968 for (unsigned inx = 0; (inx < elemCount); inx++)
4970 // The GC information must match what we setup using 'objClass'
4971 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
4974 else // this varDsc contains no GC pointers
4976 for (unsigned inx = 0; inx < elemCount; inx++)
4978 // The GC information must match what we setup using 'objClass'
4979 noway_assert(!varTypeIsGC(type[inx]));
4984 // We create a list of GT_LCL_FLDs nodes to pass this struct
4986 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4988 // Create a new tree for 'arg'
4989 // replace the existing LDOBJ(ADDR(LCLVAR))
4990 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
4992 unsigned offset = baseOffset;
4993 GenTreeFieldList* listEntry = nullptr;
4994 for (unsigned inx = 0; inx < elemCount; inx++)
4996 elemSize = genTypeSize(type[inx]);
4997 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
4998 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
4999 if (newArg == nullptr)
5006 // Are we passing a GT_OBJ struct?
5008 else if (argValue->OperGet() == GT_OBJ)
5010 GenTreeObj* argObj = argValue->AsObj();
5011 GenTreePtr baseAddr = argObj->gtOp1;
5012 var_types addrType = baseAddr->TypeGet();
5014 // Create a new tree for 'arg'
5015 // replace the existing LDOBJ(EXPR)
5016 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5019 unsigned offset = 0;
5020 GenTreeFieldList* listEntry = nullptr;
5021 for (unsigned inx = 0; inx < elemCount; inx++)
5023 elemSize = genTypeSize(type[inx]);
5024 GenTreePtr curAddr = baseAddr;
5027 GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5028 noway_assert(baseAddrDup != nullptr);
5029 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5035 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5037 // For safety all GT_IND should have at least GT_GLOB_REF set.
5038 curItem->gtFlags |= GTF_GLOB_REF;
5039 if (fgAddrCouldBeNull(curItem))
5041 // This indirection can cause a GPF if the address could be null.
5042 curItem->gtFlags |= GTF_EXCEPT;
5045 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5046 if (newArg == nullptr)
5056 // If we reach here we should have set newArg to something
5057 if (newArg == nullptr)
5059 gtDispTree(argValue);
5060 assert(!"Missing case in fgMorphMultiregStructArg");
5065 printf("fgMorphMultiregStructArg created tree:\n");
5070 arg = newArg; // consider calling fgMorphTree(newArg);
5072 #endif // FEATURE_MULTIREG_ARGS
5077 // Make a copy of a struct variable if necessary, to pass to a callee.
5078 // returns: tree that computes address of the outgoing arg
5079 void Compiler::fgMakeOutgoingStructArgCopy(
5083 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5084 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5086 GenTree* argx = args->Current();
5087 noway_assert(argx->gtOper != GT_MKREFANY);
5088 // See if we need to insert a copy at all
5089 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5090 // but if there is only one use and no loops, the use must be last.
5091 GenTreeLclVarCommon* lcl = nullptr;
5092 if (argx->OperIsLocal())
5094 lcl = argx->AsLclVarCommon();
5096 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5098 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5102 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5103 if (lvaIsImplicitByRefLocal(varNum))
5105 LclVarDsc* varDsc = &lvaTable[varNum];
5106 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5107 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5108 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5109 // struct parameters if they are passed as arguments to a tail call.
5110 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5112 varDsc->lvRefCnt = 0;
5113 args->gtOp.gtOp1 = lcl;
5114 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5117 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5123 if (fgOutgoingArgTemps == nullptr)
5125 fgOutgoingArgTemps = hashBv::Create(this);
5131 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5132 // We do not reuse within a statement.
5133 if (!opts.MinOpts())
5136 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5138 LclVarDsc* varDsc = &lvaTable[lclNum];
5139 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5140 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5142 tmp = (unsigned)lclNum;
5144 JITDUMP("reusing outgoing struct arg");
5151 // Create the CopyBlk tree and insert it.
5155 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5156 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5157 lvaSetStruct(tmp, copyBlkClass, false);
5158 fgOutgoingArgTemps->setBit(tmp);
5161 fgCurrentlyInUseArgTemps->setBit(tmp);
5163 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5164 // allocated on stack and address of it needs to be passed.
5165 if (lclVarIsSIMDType(tmp))
5167 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5170 // Create a reference to the temp
5171 GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5172 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5174 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5175 // to ref counting of the lclVars.
5176 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5179 if (argx->gtOper == GT_OBJ)
5181 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5185 argx->gtFlags |= GTF_DONT_CSE;
5188 // Copy the valuetype to the temp
5189 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5190 GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5191 copyBlk = fgMorphCopyBlock(copyBlk);
5193 #if FEATURE_FIXED_OUT_ARGS
5195 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5196 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5197 GenTreePtr arg = copyBlk;
5199 #else // FEATURE_FIXED_OUT_ARGS
5201 // Structs are always on the stack, and thus never need temps
5202 // so we have to put the copy and temp all into one expression
5203 GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5205 // Change the expression to "(tmp=val),tmp"
5206 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5208 #endif // FEATURE_FIXED_OUT_ARGS
5210 args->gtOp.gtOp1 = arg;
5211 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5217 // See declaration for specification comment.
5218 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5219 unsigned firstArgRegNum,
5220 regMaskTP* pArgSkippedRegMask)
5222 assert(varDsc->lvPromoted);
5223 // There's no way to do these calculations without breaking abstraction and assuming that
5224 // integer register arguments are consecutive ints. They are on ARM.
5226 // To start, figure out what register contains the last byte of the first argument.
5227 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5228 unsigned lastFldRegOfLastByte =
5229 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5232 // Now we're keeping track of the register that the last field ended in; see what registers
5233 // subsequent fields start in, and whether any are skipped.
5234 // (We assume here the invariant that the fields are sorted in offset order.)
5235 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5237 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5238 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5239 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5240 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5241 // This loop should enumerate the offsets of any registers skipped.
5242 // Find what reg contains the last byte:
5243 // And start at the first register after that. If that isn't the first reg of the current
5244 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5245 skippedRegOffsets++)
5247 // If the register number would not be an arg reg, we're done.
5248 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5250 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5252 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5256 #endif // _TARGET_ARM_
5258 //****************************************************************************
5259 // fgFixupStructReturn:
5260 // The companion to impFixupCallStructReturn. Now that the importer is done
5261 // change the gtType to the precomputed native return type
5262 // requires that callNode currently has a struct type
5264 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5266 assert(varTypeIsStruct(callNode));
5268 GenTreeCall* call = callNode->AsCall();
5269 bool callHasRetBuffArg = call->HasRetBufArg();
5270 bool isHelperCall = call->IsHelperCall();
5272 // Decide on the proper return type for this call that currently returns a struct
5274 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5275 Compiler::structPassingKind howToReturnStruct;
5276 var_types returnType;
5278 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5279 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5281 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5282 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5283 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5287 assert(!callHasRetBuffArg);
5288 assert(retClsHnd == NO_CLASS_HANDLE);
5290 // Now that we are past the importer, re-type this node
5291 howToReturnStruct = SPK_PrimitiveType;
5292 returnType = (var_types)call->gtReturnType;
5296 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5299 if (howToReturnStruct == SPK_ByReference)
5301 assert(returnType == TYP_UNKNOWN);
5302 assert(callHasRetBuffArg);
5306 assert(returnType != TYP_UNKNOWN);
5308 if (returnType != TYP_STRUCT)
5310 // Widen the primitive type if necessary
5311 returnType = genActualType(returnType);
5313 call->gtType = returnType;
5316 #if FEATURE_MULTIREG_RET
5317 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5318 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5319 #else // !FEATURE_MULTIREG_RET
5320 // No more struct returns
5321 assert(call->TypeGet() != TYP_STRUCT);
5324 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5325 // If it was a struct return, it has been transformed into a call
5326 // with a return buffer (that returns TYP_VOID) or into a return
5327 // of a primitive/enregisterable type
5328 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5332 /*****************************************************************************
5334 * A little helper used to rearrange nested commutative operations. The
5335 * effect is that nested associative, commutative operations are transformed
5336 * into a 'left-deep' tree, i.e. into something like this:
5338 * (((a op b) op c) op d) op...
5343 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5351 op1 = tree->gtOp.gtOp1;
5352 op2 = tree->gtOp.gtOp2;
5353 oper = tree->OperGet();
5355 noway_assert(GenTree::OperIsCommutative(oper));
5356 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5357 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5358 noway_assert(oper == op2->gtOper);
5360 // Commutativity doesn't hold if overflow checks are needed
5362 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5367 if (gtIsActiveCSE_Candidate(op2))
5369 // If we have marked op2 as a CSE candidate,
5370 // we can't perform a commutative reordering
5371 // because any value numbers that we computed for op2
5372 // will be incorrect after performing a commutative reordering
5377 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5382 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5383 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5388 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5390 // We could deal with this, but we were always broken and just hit the assert
5391 // below regarding flags, which means it's not frequent, so will just bail out.
5396 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5398 GenTreePtr ad1 = op2->gtOp.gtOp1;
5399 GenTreePtr ad2 = op2->gtOp.gtOp2;
5401 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5402 // We can not reorder such GT_OR trees
5404 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5409 /* Change "(x op (y op z))" to "(x op y) op z" */
5410 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5412 GenTreePtr new_op1 = op2;
5414 new_op1->gtOp.gtOp1 = op1;
5415 new_op1->gtOp.gtOp2 = ad1;
5417 /* Change the flags. */
5419 // Make sure we arent throwing away any flags
5420 noway_assert((new_op1->gtFlags &
5421 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5422 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5423 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5426 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5427 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5429 /* Retype new_op1 if it has not/become a GC ptr. */
5431 if (varTypeIsGC(op1->TypeGet()))
5433 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5434 oper == GT_ADD) || // byref(ref + (int+int))
5435 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5436 oper == GT_OR)); // int(gcref | int(gcref|intval))
5438 new_op1->gtType = tree->gtType;
5440 else if (varTypeIsGC(ad2->TypeGet()))
5442 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5443 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5444 new_op1->gtType = TYP_I_IMPL;
5447 // If new_op1 is a new expression. Assign it a new unique value number.
5448 // vnStore is null before the ValueNumber phase has run
5449 if (vnStore != nullptr)
5451 // We can only keep the old value number on new_op1 if both op1 and ad2
5452 // have the same non-NoVN value numbers. Since op is commutative, comparing
5453 // only ad2 and op1 is enough.
5454 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5455 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5456 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5458 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5462 tree->gtOp.gtOp1 = new_op1;
5463 tree->gtOp.gtOp2 = ad2;
5465 /* If 'new_op1' is now the same nested op, process it recursively */
5467 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5469 fgMoveOpsLeft(new_op1);
5472 /* If 'ad2' is now the same nested op, process it
5473 * Instead of recursion, we set up op1 and op2 for the next loop.
5478 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5485 /*****************************************************************************/
5487 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5489 GenTreeBoundsChk* bndsChk = nullptr;
5490 SpecialCodeKind kind = SCK_RNGCHK_FAIL;
5493 if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5494 #else // FEATURE_SIMD
5495 if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5496 #endif // FEATURE_SIMD
5498 bndsChk = tree->AsBoundsChk();
5499 kind = tree->gtBoundsChk.gtThrowKind;
5503 noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5507 unsigned callStkDepth = fgPtrArgCntCur;
5509 // only x86 pushes args
5510 const unsigned callStkDepth = 0;
5517 // we need to initialize this field
5518 if (fgGlobalMorph && bndsChk != nullptr)
5520 bndsChk->gtStkDepth = callStkDepth;
5524 if (!opts.compDbgCode)
5526 if (delay || compIsForInlining())
5528 /* We delay this until after loop-oriented range check
5529 analysis. For now we merely store the current stack
5530 level in the tree node.
5532 if (bndsChk != nullptr)
5534 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5535 bndsChk->gtStkDepth = callStkDepth;
5540 /* Create/find the appropriate "range-fail" label */
5542 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5543 noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5545 unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5547 BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5549 /* Add the label to the indirection node */
5551 if (bndsChk != nullptr)
5553 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5559 /*****************************************************************************
5561 * Expand a GT_INDEX node and fully morph the child operands
5563 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5564 * the array element. We expand the GT_INDEX node into a larger tree that
5565 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5566 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5567 * For complex array or index expressions one or more GT_COMMA assignments
5568 * are inserted so that we only evaluate the array or index expressions once.
5570 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5571 * perform local constant prop and reorder the constants in the tree and
5574 * We then parse the resulting array element expression in order to locate
5575 * and label the constants and variables that occur in the tree.
5578 const int MAX_ARR_COMPLEXITY = 4;
5579 const int MAX_INDEX_COMPLEXITY = 4;
5581 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5583 noway_assert(tree->gtOper == GT_INDEX);
5584 GenTreeIndex* asIndex = tree->AsIndex();
5586 var_types elemTyp = tree->TypeGet();
5587 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5588 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5590 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5593 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5595 // If this is a SIMD type, this is the point at which we lose the type information,
5596 // so we need to set the correct type on the GT_IND.
5597 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5598 unsigned simdElemSize = 0;
5599 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5601 assert(simdElemSize == elemSize);
5602 elemTyp = getSIMDTypeForSize(elemSize);
5603 // This is the new type of the node.
5604 tree->gtType = elemTyp;
5605 // Now set elemStructType to null so that we don't confuse value numbering.
5606 elemStructType = nullptr;
5609 #endif // FEATURE_SIMD
5611 GenTreePtr arrRef = asIndex->Arr();
5612 GenTreePtr index = asIndex->Index();
5614 // Set up the the array length's offset into lenOffs
5615 // And the the first element's offset into elemOffs
5618 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5620 lenOffs = offsetof(CORINFO_String, stringLen);
5621 elemOffs = offsetof(CORINFO_String, chars);
5622 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5624 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5626 lenOffs = offsetof(CORINFO_RefArray, length);
5627 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5629 else // We have a standard array
5631 lenOffs = offsetof(CORINFO_Array, length);
5632 elemOffs = offsetof(CORINFO_Array, u1Elems);
5635 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5636 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5638 GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5639 GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5640 GenTreePtr bndsChk = nullptr;
5642 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5645 GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5646 GenTreePtr index2 = nullptr;
5648 // If the arrRef expression involves an assignment, a call or reads from global memory,
5649 // then we *must* allocate a temporary in which to "localize" those values,
5650 // to ensure that the same values are used in the bounds check and the actual
5652 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5653 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5654 // complexity is not exposed. (Without that condition there are cases of local struct
5655 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5656 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5658 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5659 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5661 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5662 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5663 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5664 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5668 arrRef2 = gtCloneExpr(arrRef);
5669 noway_assert(arrRef2 != nullptr);
5672 // If the index expression involves an assignment, a call or reads from global memory,
5673 // we *must* allocate a temporary in which to "localize" those values,
5674 // to ensure that the same values are used in the bounds check and the actual
5676 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5678 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5679 (arrRef->OperGet() == GT_FIELD))
5681 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5682 indexDefn = gtNewTempAssign(indexTmpNum, index);
5683 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5684 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5688 index2 = gtCloneExpr(index);
5689 noway_assert(index2 != nullptr);
5692 // Next introduce a GT_ARR_BOUNDS_CHECK node
5693 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5695 #ifdef _TARGET_64BIT_
5696 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5697 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5698 // the comparison will have to be widen to 64 bits.
5699 if (index->TypeGet() == TYP_I_IMPL)
5701 bndsChkType = TYP_I_IMPL;
5703 #endif // _TARGET_64BIT_
5705 GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5707 if (bndsChkType != TYP_INT)
5709 arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5712 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5713 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5715 bndsChk = arrBndsChk;
5717 // Make sure to increment ref-counts if already ref-counted.
5718 if (lvaLocalVarRefCounted)
5720 lvaRecursiveIncRefCounts(index);
5721 lvaRecursiveIncRefCounts(arrRef);
5724 // Now we'll switch to using the second copies for arrRef and index
5725 // to compute the address expression
5731 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5735 #ifdef _TARGET_64BIT_
5736 // Widen 'index' on 64-bit targets
5737 if (index->TypeGet() != TYP_I_IMPL)
5739 if (index->OperGet() == GT_CNS_INT)
5741 index->gtType = TYP_I_IMPL;
5745 index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5748 #endif // _TARGET_64BIT_
5750 /* Scale the index value if necessary */
5753 GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5755 // Fix 392756 WP7 Crossgen
5757 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5758 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5759 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5761 size->gtFlags |= GTF_DONT_CSE;
5763 /* Multiply by the array element size */
5764 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5771 /* Add the object ref to the element's offset */
5773 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5775 /* Add the first element's offset */
5777 GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5779 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5781 #if SMALL_TREE_NODES
5782 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5785 // Change the orginal GT_INDEX node into a GT_IND node
5786 tree->SetOper(GT_IND);
5788 // If the index node is a floating-point type, notify the compiler
5789 // we'll potentially use floating point registers at the time of codegen.
5790 if (varTypeIsFloating(tree->gtType))
5792 this->compFloatingPointUsed = true;
5795 // We've now consumed the GTF_INX_RNGCHK, and the node
5796 // is no longer a GT_INDEX node.
5797 tree->gtFlags &= ~GTF_INX_RNGCHK;
5799 tree->gtOp.gtOp1 = addr;
5801 // This is an array index expression.
5802 tree->gtFlags |= GTF_IND_ARR_INDEX;
5804 /* An indirection will cause a GPF if the address is null */
5805 tree->gtFlags |= GTF_EXCEPT;
5809 tree->gtFlags |= GTF_DONT_CSE;
5812 // Store information about it.
5813 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5815 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5817 GenTreePtr indTree = tree;
5819 // Did we create a bndsChk tree?
5822 // Use a GT_COMMA node to prepend the array bound check
5824 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5826 /* Mark the indirection node as needing a range check */
5827 fgSetRngChkTarget(bndsChk);
5830 if (indexDefn != nullptr)
5832 // Use a GT_COMMA node to prepend the index assignment
5834 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5836 if (arrRefDefn != nullptr)
5838 // Use a GT_COMMA node to prepend the arRef assignment
5840 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5843 // Currently we morph the tree to perform some folding operations prior
5844 // to attaching fieldSeq info and labeling constant array index contributions
5848 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5849 // constant array index contributions, but the morphing operation may have changed
5850 // the 'tree' into something that now unconditionally throws an exception.
5852 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5853 // or it could be left unchanged. If it is unchanged then we should not return,
5854 // instead we should proceed to attaching fieldSeq info, etc...
5856 GenTreePtr arrElem = tree->gtEffectiveVal();
5858 if (fgIsCommaThrow(tree))
5860 if ((arrElem != indTree) || // A new tree node may have been created
5861 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5863 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
5867 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
5869 addr = arrElem->gtOp.gtOp1;
5871 assert(addr->TypeGet() == TYP_BYREF);
5873 GenTreePtr cnsOff = nullptr;
5874 if (addr->OperGet() == GT_ADD)
5876 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
5878 cnsOff = addr->gtOp.gtOp2;
5879 addr = addr->gtOp.gtOp1;
5882 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
5884 assert(addr->TypeGet() == TYP_BYREF);
5885 GenTreePtr index = addr->gtOp.gtOp2;
5887 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5888 index->LabelIndex(this);
5890 addr = addr->gtOp.gtOp1;
5892 assert(addr->TypeGet() == TYP_REF);
5894 else if (addr->OperGet() == GT_CNS_INT)
5899 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
5901 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
5903 // Assign it the [#FirstElem] field sequence
5905 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
5907 else // We have folded the first element's offset with the index expression
5909 // Build the [#ConstantIndex, #FirstElem] field sequence
5911 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
5912 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
5914 if (cnsOff == nullptr) // It must have folded into a zero offset
5916 // Record in the general zero-offset map.
5917 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
5921 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
5929 /*****************************************************************************
5931 * Wrap fixed stack arguments for varargs functions to go through varargs
5932 * cookie to access them, except for the cookie itself.
5934 * Non-x86 platforms are allowed to access all arguments directly
5935 * so we don't need this code.
5938 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
5940 /* For the fixed stack arguments of a varargs function, we need to go
5941 through the varargs cookies to access them, except for the
5944 LclVarDsc* varDsc = &lvaTable[lclNum];
5946 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
5948 // Create a node representing the local pointing to the base of the args
5950 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
5951 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
5954 // Access the argument through the local
5956 if (varType == TYP_STRUCT)
5958 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
5962 tree = gtNewOperNode(GT_IND, varType, ptrArg);
5964 tree->gtFlags |= GTF_IND_TGTANYWHERE;
5966 if (varDsc->lvAddrExposed)
5968 tree->gtFlags |= GTF_GLOB_REF;
5971 return fgMorphTree(tree);
5978 /*****************************************************************************
5980 * Transform the given GT_LCL_VAR tree for code generation.
5983 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
5985 noway_assert(tree->gtOper == GT_LCL_VAR);
5987 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
5988 var_types varType = lvaGetRealType(lclNum);
5989 LclVarDsc* varDsc = &lvaTable[lclNum];
5991 if (varDsc->lvAddrExposed)
5993 tree->gtFlags |= GTF_GLOB_REF;
5997 if (info.compIsVarArgs)
5999 GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6000 if (newTree != nullptr)
6002 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6004 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6009 #endif // _TARGET_X86_
6011 /* If not during the global morphing phase bail */
6018 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6020 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6022 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6024 #if LOCAL_ASSERTION_PROP
6025 /* Assertion prop can tell us to omit adding a cast here */
6026 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6031 /* Small-typed arguments and aliased locals are normalized on load.
6032 Other small-typed locals are normalized on store.
6033 Also, under the debugger as the debugger could write to the variable.
6034 If this is one of the former, insert a narrowing cast on the load.
6035 ie. Convert: var-short --> cast-short(var-int) */
6037 tree->gtType = TYP_INT;
6038 fgMorphTreeDone(tree);
6039 tree = gtNewCastNode(TYP_INT, tree, varType);
6040 fgMorphTreeDone(tree);
6047 /*****************************************************************************
6048 Grab a temp for big offset morphing.
6049 This method will grab a new temp if no temp of this "type" has been created.
6050 Or it will return the same cached one if it has been created.
6052 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6054 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6056 if (lclNum == BAD_VAR_NUM)
6058 // We haven't created a temp for this kind of type. Create one now.
6059 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6060 fgBigOffsetMorphingTemps[type] = lclNum;
6064 // We better get the right type.
6065 noway_assert(lvaTable[lclNum].TypeGet() == type);
6068 noway_assert(lclNum != BAD_VAR_NUM);
6072 /*****************************************************************************
6074 * Transform the given GT_FIELD tree for code generation.
6077 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6079 assert(tree->gtOper == GT_FIELD);
6081 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6082 unsigned fldOffset = tree->gtField.gtFldOffset;
6083 GenTreePtr objRef = tree->gtField.gtFldObj;
6084 bool fieldMayOverlap = false;
6085 bool objIsLocal = false;
6087 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6088 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6090 if (tree->gtField.gtFldMayOverlap)
6092 fieldMayOverlap = true;
6093 // Reset the flag because we may reuse the node.
6094 tree->gtField.gtFldMayOverlap = false;
6098 // if this field belongs to simd struct, translate it to simd instrinsic.
6101 GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6102 if (newTree != tree)
6104 newTree = fgMorphSmpOp(newTree);
6108 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6110 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6113 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6118 /* Is this an instance data member? */
6123 objIsLocal = objRef->IsLocal();
6125 if (tree->gtFlags & GTF_IND_TLS_REF)
6127 NO_WAY("instance field can not be a TLS ref.");
6130 /* We'll create the expression "*(objRef + mem_offs)" */
6132 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6134 // An optimization for Contextful classes:
6135 // we unwrap the proxy when we have a 'this reference'
6136 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6138 objRef = fgUnwrapProxy(objRef);
6142 Now we have a tree like this:
6144 +--------------------+
6146 +----------+---------+
6148 +--------------+-------------+
6149 | tree->gtField.gtFldObj |
6150 +--------------+-------------+
6153 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6155 +--------------------+
6156 | GT_IND/GT_OBJ | tree
6157 +---------+----------+
6160 +---------+----------+
6162 +---------+----------+
6167 +-------------------+ +----------------------+
6168 | objRef | | fldOffset |
6169 | | | (when fldOffset !=0) |
6170 +-------------------+ +----------------------+
6173 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6176 +--------------------+
6177 | GT_IND/GT_OBJ | tree
6178 +----------+---------+
6180 +----------+---------+
6182 +----------+---------+
6188 +---------+----------+ +---------+----------+
6189 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6190 +---------+----------+ +---------+----------+
6195 +-----+-----+ +-----+-----+ +---------+ +-----------+
6196 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6197 +-----+-----+ +-----+-----+ +---------+ +-----------+
6202 +-----+-----+ +-----+-----+ +-----------+
6203 | tmpLcl | | objRef | | tmpLcl |
6204 +-----------+ +-----------+ +-----------+
6209 var_types objRefType = objRef->TypeGet();
6211 GenTreePtr comma = nullptr;
6213 bool addedExplicitNullCheck = false;
6215 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6216 // and thus is equivalent to a MACK_Ind with zero offset.
6217 MorphAddrContext defMAC(MACK_Ind);
6223 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6224 // This means that we insert an explicit null check whenever we create byref by adding a
6225 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6226 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6227 // small offsets); in this plan, we would transfer some null-checking responsibility to
6228 // callee's of methods taking byref parameters. They would have to add explicit null checks
6229 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6230 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6231 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6232 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6233 // This is left here to point out how to implement it.
6234 CLANG_FORMAT_COMMENT_ANCHOR;
6236 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6238 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6239 // whose address is being taken is either a local or static variable, whose address is necessarily
6240 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6241 if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6242 (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6243 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6244 || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6246 || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6247 (mac->m_totalOffset + fldOffset > 0))
6254 printf("Before explicit null check morphing:\n");
6260 // Create the "comma" subtree
6262 GenTreePtr asg = nullptr;
6267 if (objRef->gtOper != GT_LCL_VAR)
6269 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6271 // Create the "asg" node
6272 asg = gtNewTempAssign(lclNum, objRef);
6276 lclNum = objRef->gtLclVarCommon.gtLclNum;
6279 // Create the "nullchk" node.
6280 // Make it TYP_BYTE so we only deference it for 1 byte.
6281 GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6282 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6284 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6286 // An indirection will cause a GPF if the address is null.
6287 nullchk->gtFlags |= GTF_EXCEPT;
6289 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6290 optMethodFlags |= OMF_HAS_NULLCHECK;
6294 // Create the "comma" node.
6295 comma = gtNewOperNode(GT_COMMA,
6296 TYP_VOID, // We don't want to return anything from this "comma" node.
6297 // Set the type to TYP_VOID, so we can select "cmp" instruction
6298 // instead of "mov" instruction later on.
6306 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6308 addedExplicitNullCheck = true;
6310 else if (fldOffset == 0)
6312 // Generate the "addr" node.
6314 FieldSeqNode* fieldSeq =
6315 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6316 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6323 #ifdef FEATURE_READYTORUN_COMPILER
6324 if (tree->gtField.gtFieldLookup.addr != nullptr)
6326 GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6328 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6330 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6334 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6339 // Generate the "addr" node.
6340 /* Add the member offset to the object's address */
6341 FieldSeqNode* fieldSeq =
6342 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6343 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6344 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6347 // Now let's set the "tree" as a GT_IND tree.
6349 tree->SetOper(GT_IND);
6350 tree->gtOp.gtOp1 = addr;
6352 if (fgAddrCouldBeNull(addr))
6354 // This indirection can cause a GPF if the address could be null.
6355 tree->gtFlags |= GTF_EXCEPT;
6358 if (addedExplicitNullCheck)
6361 // Create "comma2" node and link it to "tree".
6364 comma2 = gtNewOperNode(GT_COMMA,
6365 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6367 tree->gtOp.gtOp1 = comma2;
6373 if (addedExplicitNullCheck)
6375 printf("After adding explicit null check:\n");
6381 else /* This is a static data member */
6383 if (tree->gtFlags & GTF_IND_TLS_REF)
6385 // Thread Local Storage static field reference
6387 // Field ref is a TLS 'Thread-Local-Storage' reference
6389 // Build this tree: IND(*) #
6397 // IND(I_IMPL) == [Base of this DLL's TLS]
6401 // / CNS(IdValue*4) or MUL
6403 // IND(I_IMPL) / CNS(4)
6405 // CNS(TLS_HDL,0x2C) IND
6409 // # Denotes the orginal node
6411 void** pIdAddr = nullptr;
6412 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6415 // If we can we access the TLS DLL index ID value directly
6416 // then pIdAddr will be NULL and
6417 // IdValue will be the actual TLS DLL index ID
6419 GenTreePtr dllRef = nullptr;
6420 if (pIdAddr == nullptr)
6424 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6429 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6430 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6431 dllRef->gtFlags |= GTF_IND_INVARIANT;
6435 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6438 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6440 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6442 GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6444 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6446 if (dllRef != nullptr)
6448 /* Add the dllRef */
6449 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6452 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6453 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6457 FieldSeqNode* fieldSeq =
6458 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6459 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6461 /* Add the TLS static field offset to the address */
6463 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6466 // Final indirect to get to actual value of TLS static field
6468 tree->SetOper(GT_IND);
6469 tree->gtOp.gtOp1 = tlsRef;
6471 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6475 // Normal static field reference
6478 // If we can we access the static's address directly
6479 // then pFldAddr will be NULL and
6480 // fldAddr will be the actual address of the static field
6482 void** pFldAddr = nullptr;
6483 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6485 if (pFldAddr == nullptr)
6487 #ifdef _TARGET_64BIT_
6488 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6490 // The address is not directly addressible, so force it into a
6491 // constant, so we handle it properly
6493 GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6494 addr->gtType = TYP_I_IMPL;
6495 FieldSeqNode* fieldSeq =
6496 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6497 addr->gtIntCon.gtFieldSeq = fieldSeq;
6499 tree->SetOper(GT_IND);
6500 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6501 // We must clear it when we transform the node.
6502 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6503 // that the logic above does its own checking to determine whether a nullcheck is needed.
6504 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6505 tree->gtOp.gtOp1 = addr;
6507 return fgMorphSmpOp(tree);
6510 #endif // _TARGET_64BIT_
6512 // Only volatile could be set, and it maps over
6513 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
6514 noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
6515 tree->SetOper(GT_CLS_VAR);
6516 tree->gtClsVar.gtClsVarHnd = symHnd;
6517 FieldSeqNode* fieldSeq =
6518 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6519 tree->gtClsVar.gtFieldSeq = fieldSeq;
6526 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6528 // There are two cases here, either the static is RVA based,
6529 // in which case the type of the FIELD node is not a GC type
6530 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6531 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6532 // because handles to statics now go into the large object heap
6534 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6535 GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6536 op1->gtFlags |= GTF_IND_INVARIANT;
6538 tree->SetOper(GT_IND);
6539 tree->gtOp.gtOp1 = op1;
6543 noway_assert(tree->gtOper == GT_IND);
6544 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6545 // We must clear it when we transform the node.
6546 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6547 // that the logic above does its own checking to determine whether a nullcheck is needed.
6548 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6550 GenTreePtr res = fgMorphSmpOp(tree);
6552 // If we have a struct type, this node would previously have been under a GT_ADDR,
6553 // and therefore would have been marked GTF_DONT_CSE.
6554 // TODO-1stClassStructs: revisit this.
6555 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6557 res->gtFlags |= GTF_DONT_CSE;
6560 if (fldOffset == 0 && res->OperGet() == GT_IND)
6562 GenTreePtr addr = res->gtOp.gtOp1;
6563 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6564 FieldSeqNode* fieldSeq =
6565 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6566 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6572 //------------------------------------------------------------------------------
6573 // fgMorphCallInline: attempt to inline a call
6576 // call - call expression to inline, inline candidate
6577 // inlineResult - result tracking and reporting
6580 // Attempts to inline the call.
6582 // If successful, callee's IR is inserted in place of the call, and
6583 // is marked with an InlineContext.
6585 // If unsuccessful, the transformations done in anticpation of a
6586 // possible inline are undone, and the candidate flag on the call
6589 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6591 // The call must be a candiate for inlining.
6592 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6594 // Attempt the inline
6595 fgMorphCallInlineHelper(call, inlineResult);
6597 // We should have made up our minds one way or another....
6598 assert(inlineResult->IsDecided());
6600 // If we failed to inline, we have a bit of work to do to cleanup
6601 if (inlineResult->IsFailure())
6606 // Before we do any cleanup, create a failing InlineContext to
6607 // capture details of the inlining attempt.
6608 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6612 // It was an inline candidate, but we haven't expanded it.
6613 if (call->gtCall.gtReturnType != TYP_VOID)
6615 // Detach the GT_CALL tree from the original statement by
6616 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6617 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6619 noway_assert(fgMorphStmt->gtStmtExpr == call);
6620 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6623 // Clear the Inline Candidate flag so we can ensure later we tried
6624 // inlining all candidates.
6626 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6630 /*****************************************************************************
6631 * Helper to attempt to inline a call
6632 * Sets success/failure in inline result
6633 * If success, modifies current method's IR with inlinee's IR
6634 * If failed, undoes any speculative modifications to current method
6637 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6639 // Don't expect any surprises here.
6640 assert(result->IsCandidate());
6642 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6644 // For now, attributing this to call site, though it's really
6645 // more of a budget issue (lvaCount currently includes all
6646 // caller and prospective callee locals). We still might be
6647 // able to inline other callees into this caller, or inline
6648 // this callee in other callers.
6649 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6653 if (call->IsVirtual())
6655 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6659 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6660 // and recursive tail calls as inline candidates.
6661 noway_assert(!call->IsTailPrefixedCall());
6662 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6664 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6665 Although we have checked this in impCanInline, it is possible that later IL instructions
6666 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6669 if (opts.compNeedSecurityCheck)
6671 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6676 // Calling inlinee's compiler to inline the method.
6679 unsigned startVars = lvaCount;
6684 printf("Expanding INLINE_CANDIDATE in statement ");
6685 printTreeID(fgMorphStmt);
6686 printf(" in BB%02u:\n", compCurBB->bbNum);
6687 gtDispTree(fgMorphStmt);
6688 if (call->IsImplicitTailCall())
6690 printf("Note: candidate is implicit tail call\n");
6695 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6698 // Invoke the compiler to inline the call.
6701 fgInvokeInlineeCompiler(call, result);
6703 if (result->IsFailure())
6705 // Undo some changes made in anticipation of inlining...
6707 // Zero out the used locals
6708 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6709 for (unsigned i = startVars; i < lvaCount; i++)
6711 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6714 lvaCount = startVars;
6719 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6729 // printf("After inlining lvaCount=%d.\n", lvaCount);
6734 /*****************************************************************************
6736 * Performs checks to see if this tail call can be optimized as epilog+jmp.
6738 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6740 #if FEATURE_FASTTAILCALL
6741 // Reached here means that return types of caller and callee are tail call compatible.
6742 // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6744 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6745 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6746 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6747 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6748 // currently not copying/setting callSig.
6749 CLANG_FORMAT_COMMENT_ANCHOR;
6752 if (callee->IsTailPrefixedCall())
6754 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6755 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6759 // Note on vararg methods:
6760 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6761 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6762 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6763 // out-going area required for callee is bounded by caller's fixed argument space.
6765 // Note that callee being a vararg method is not a problem since we can account the params being passed.
6767 // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6768 unsigned nCallerArgs = info.compArgsCount;
6770 // Count the callee args including implicit and hidden.
6771 // Note that GenericContext and VarargCookie are added by importer while
6772 // importing the call to gtCallArgs list along with explicit user args.
6773 unsigned nCalleeArgs = 0;
6774 if (callee->gtCallObjp) // thisPtr
6779 if (callee->HasRetBufArg()) // RetBuf
6783 // If callee has RetBuf param, caller too must have it.
6784 // Otherwise go the slow route.
6785 if (info.compRetBuffArg == BAD_VAR_NUM)
6791 // Count user args while tracking whether any of them is a multi-byte params
6792 // that cannot be passed in a register. Note that we don't need to count
6793 // non-standard and secret params passed in registers (e.g. R10, R11) since
6794 // these won't contribute to out-going arg size.
6795 bool hasMultiByteArgs = false;
6796 for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6800 assert(args->OperIsList());
6801 GenTreePtr argx = args->gtOp.gtOp1;
6803 if (varTypeIsStruct(argx))
6805 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
6806 while (argx->gtOper == GT_COMMA)
6808 argx = argx->gtOp.gtOp2;
6811 // Get the size of the struct and see if it is register passable.
6812 CORINFO_CLASS_HANDLE objClass = nullptr;
6814 if (argx->OperGet() == GT_OBJ)
6816 objClass = argx->AsObj()->gtClass;
6818 else if (argx->IsLocal())
6820 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
6822 if (objClass != nullptr)
6824 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
6826 unsigned typeSize = 0;
6827 hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
6829 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
6830 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
6831 // Account for the second eightbyte in the nCalleeArgs.
6832 // https://github.com/dotnet/coreclr/issues/2666
6833 // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
6834 // as two args, since they need two registers whereas nCallerArgs is
6835 // counting such an arg as one. This would mean we will not be optimizing
6836 // certain calls though technically possible.
6838 if (typeSize > TARGET_POINTER_SIZE)
6840 unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
6841 nCalleeArgs += extraArgRegsToAdd;
6843 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
6846 assert(!"Target platform ABI rules regarding passing struct type args in registers");
6848 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
6852 hasMultiByteArgs = true;
6857 // Go the slow route, if it has multi-byte params
6858 if (hasMultiByteArgs)
6863 // If we reached here means that callee has only those argument types which can be passed in
6864 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
6865 // If we are passing args on stack for callee and it has more args passed on stack than
6866 // caller, then fast tail call cannot be performed.
6868 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
6869 // as non-interruptible for fast tail calls.
6870 if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
6881 /*****************************************************************************
6883 * Transform the given GT_CALL tree for tail call code generation.
6885 void Compiler::fgMorphTailCall(GenTreeCall* call)
6887 JITDUMP("fgMorphTailCall (before):\n");
6890 #if defined(_TARGET_ARM_)
6891 // For the helper-assisted tail calls, we need to push all the arguments
6892 // into a single list, and then add a few extra at the beginning
6894 // Check for PInvoke call types that we don't handle in codegen yet.
6895 assert(!call->IsUnmanaged());
6896 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
6898 // First move the this pointer (if any) onto the regular arg list
6899 GenTreePtr thisPtr = NULL;
6900 if (call->gtCallObjp)
6902 GenTreePtr objp = call->gtCallObjp;
6903 call->gtCallObjp = NULL;
6905 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
6907 thisPtr = gtClone(objp, true);
6908 var_types vt = objp->TypeGet();
6909 if (thisPtr == NULL)
6911 // Too complex, so use a temp
6912 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
6913 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
6914 if (!call->IsVirtualVtable())
6916 // Add an indirection to get the nullcheck
6917 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
6918 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
6919 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
6921 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
6922 thisPtr = gtNewLclvNode(lclNum, vt);
6924 else if (!call->IsVirtualVtable())
6926 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
6927 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
6928 thisPtr = gtClone(thisPtr, true);
6931 call->gtFlags &= ~GTF_CALL_NULLCHECK;
6934 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
6937 // Add the extra VSD parameter if needed
6938 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
6939 if (call->IsVirtualStub())
6941 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
6944 if (call->gtCallType == CT_INDIRECT)
6946 arg = gtClone(call->gtCallAddr, true);
6947 noway_assert(arg != NULL);
6951 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
6952 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
6953 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6955 // Change the call type, so we can add the extra indirection here, rather than in codegen
6956 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6957 call->gtStubCallStubAddr = NULL;
6958 call->gtCallType = CT_INDIRECT;
6960 // Add the extra indirection to generate the real target
6961 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
6962 call->gtFlags |= GTF_EXCEPT;
6964 // And push the stub address onto the list of arguments
6965 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6967 else if (call->IsVirtualVtable())
6969 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
6971 noway_assert(thisPtr != NULL);
6973 GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
6974 GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6975 vtbl->gtFlags |= GTF_EXCEPT;
6977 unsigned vtabOffsOfIndirection;
6978 unsigned vtabOffsAfterIndirection;
6979 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
6981 /* Get the appropriate vtable chunk */
6983 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
6984 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6986 /* Now the appropriate vtable slot */
6988 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
6989 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6991 // Switch this to a plain indirect call
6992 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
6993 assert(!call->IsVirtual());
6994 call->gtCallType = CT_INDIRECT;
6996 call->gtCallAddr = vtbl;
6997 call->gtCallCookie = NULL;
6998 call->gtFlags |= GTF_EXCEPT;
7001 // Now inject a placeholder for the real call target that codegen
7003 GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7004 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7005 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7007 // Lastly inject the pointer for the copy routine
7008 noway_assert(call->callSig != NULL);
7009 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7010 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7011 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7013 // It is now a varargs tail call
7014 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7015 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7017 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7019 // x86 classic codegen doesn't require any morphing
7021 // For the helper-assisted tail calls, we need to push all the arguments
7022 // into a single list, and then add a few extra at the beginning or end.
7024 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7026 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7028 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7029 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7030 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7032 // For x86, the tailcall helper is defined as:
7034 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7037 // Note that the special arguments are on the stack, whereas the function arguments follow
7038 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7039 // look like (highest address at the top):
7040 // first normal stack argument
7042 // last normal stack argument
7043 // numberOfOldStackArgs
7044 // numberOfNewStackArgs
7048 // Each special arg is 4 bytes.
7050 // 'flags' is a bitmask where:
7051 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7052 // callee-saved registers for tailcall functions. Note that the helper assumes
7053 // that the callee-saved registers live immediately below EBP, and must have been
7054 // pushed in this order: EDI, ESI, EBX.
7055 // 2 == call target is a virtual stub dispatch.
7057 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7058 // on the custom calling convention.
7060 // Check for PInvoke call types that we don't handle in codegen yet.
7061 assert(!call->IsUnmanaged());
7062 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7064 // Don't support tail calling helper methods
7065 assert(call->gtCallType != CT_HELPER);
7067 // We come this route only for tail prefixed calls that cannot be dispatched as
7069 assert(!call->IsImplicitTailCall());
7070 assert(!fgCanFastTailCall(call));
7072 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7073 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7074 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7075 // addition, for all platforms, we are going to change the call into a helper call. Our code
7076 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7077 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7078 // since special 'this' pointer handling will no longer kick in.
7080 // Some call types, such as virtual vtable calls, require creating a call address expression
7081 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7082 // to create a temporary that is assigned to the "this" pointer expression, and then use
7083 // that temp to create the call address expression. This temp creation embedded statement
7084 // will occur immediately before the "this" pointer argument, and then will be used for both
7085 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7086 // the embedded statement establishing the "this" pointer temp will execute before both uses
7087 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7088 // normal call argument list, and insert a placeholder which will hold the call address
7089 // expression. For non-x86, things are ok, because the order of execution of these is not
7090 // altered. However, for x86, the call address expression is inserted as the *last* argument
7091 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7092 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7093 // for those cases where call lowering creates an embedded form temp of "this", we will
7094 // create a temp here, early, that will later get morphed correctly.
7096 if (call->gtCallObjp)
7098 GenTreePtr thisPtr = nullptr;
7099 GenTreePtr objp = call->gtCallObjp;
7100 call->gtCallObjp = nullptr;
7103 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7106 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7107 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7109 // COMMA(tmp = "this", tmp)
7110 var_types vt = objp->TypeGet();
7111 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7112 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7116 #endif // _TARGET_X86_
7118 #if defined(_TARGET_X86_)
7119 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7120 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7121 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7122 if (call->NeedsNullCheck() || call->IsVirtualStub())
7124 if (call->NeedsNullCheck())
7125 #endif // defined(_TARGET_X86_)
7127 // clone "this" if "this" has no side effects.
7128 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7130 thisPtr = gtClone(objp, true);
7133 var_types vt = objp->TypeGet();
7134 if (thisPtr == nullptr)
7136 // create a temp if either "this" has side effects or "this" is too complex to clone.
7139 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7140 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7142 // COMMA(tmp = "this", deref(tmp))
7143 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7144 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7145 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7147 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7148 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7152 // thisPtr = COMMA(deref("this"), "this")
7153 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7154 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7157 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7164 // During rationalization tmp="this" and null check will
7165 // materialize as embedded stmts in right execution order.
7166 assert(thisPtr != nullptr);
7167 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7170 #if defined(_TARGET_AMD64_)
7172 // Add the extra VSD parameter to arg list in case of VSD calls.
7173 // Tail call arg copying thunk will move this extra VSD parameter
7174 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7175 // in Stublinkerx86.cpp for more details.
7176 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7177 if (call->IsVirtualStub())
7179 GenTreePtr stubAddrArg;
7181 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7183 if (call->gtCallType == CT_INDIRECT)
7185 stubAddrArg = gtClone(call->gtCallAddr, true);
7186 noway_assert(stubAddrArg != nullptr);
7190 noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7192 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7193 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7196 // Push the stub address onto the list of arguments
7197 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7200 // Now inject a placeholder for the real call target that Lower phase will generate.
7201 GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
7202 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7204 // Inject the pointer for the copy routine to be used for struct copying
7205 noway_assert(call->callSig != nullptr);
7206 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7207 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7208 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7210 #else // !_TARGET_AMD64_
7212 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7213 // append to the list.
7214 GenTreeArgList** ppArg = &call->gtCallArgs;
7215 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7217 ppArg = (GenTreeArgList**)&args->gtOp2;
7219 assert(ppArg != nullptr);
7220 assert(*ppArg == nullptr);
7222 unsigned nOldStkArgsWords =
7223 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7224 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7225 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7226 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7228 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7229 // The constant will be replaced.
7230 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7231 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7232 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7234 // Inject a placeholder for the flags.
7235 // The constant will be replaced.
7236 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7237 *ppArg = gtNewListNode(arg1, nullptr);
7238 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7240 // Inject a placeholder for the real call target that the Lowering phase will generate.
7241 // The constant will be replaced.
7242 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7243 *ppArg = gtNewListNode(arg0, nullptr);
7245 #endif // !_TARGET_AMD64_
7247 // It is now a varargs tail call dispatched via helper.
7248 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7249 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7253 JITDUMP("fgMorphTailCall (after):\n");
7257 //------------------------------------------------------------------------------
7258 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7262 // block - basic block ending with a recursive fast tail call
7263 // recursiveTailCall - recursive tail call to transform
7266 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7268 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7270 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7271 GenTreePtr last = block->lastStmt();
7272 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7274 // Transform recursive tail call into a loop.
7276 GenTreePtr earlyArgInsertionPoint = last;
7277 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
7279 // Hoist arg setup statement for the 'this' argument.
7280 GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7281 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7283 GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7284 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7287 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7288 // then the temps need to be assigned to the method parameters. This is done so that the caller
7289 // parameters are not re-assigned before call arguments depending on them are evaluated.
7290 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7291 // where the next temp or parameter assignment should be inserted.
7293 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7294 // while the second call argument (const 1) doesn't.
7295 // Basic block before tail recursion elimination:
7296 // ***** BB04, stmt 1 (top level)
7297 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7298 // [000033] --C - G------ - \--* call void RecursiveMethod
7299 // [000030] ------------ | / --* const int - 1
7300 // [000031] ------------arg0 in rcx + --* +int
7301 // [000029] ------------ | \--* lclVar int V00 arg1
7302 // [000032] ------------arg1 in rdx \--* const int 1
7305 // Basic block after tail recursion elimination :
7306 // ***** BB04, stmt 1 (top level)
7307 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7308 // [000030] ------------ | / --* const int - 1
7309 // [000031] ------------ | / --* +int
7310 // [000029] ------------ | | \--* lclVar int V00 arg1
7311 // [000050] - A---------- \--* = int
7312 // [000049] D------N---- \--* lclVar int V02 tmp0
7314 // ***** BB04, stmt 2 (top level)
7315 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7316 // [000052] ------------ | / --* lclVar int V02 tmp0
7317 // [000054] - A---------- \--* = int
7318 // [000053] D------N---- \--* lclVar int V00 arg0
7320 // ***** BB04, stmt 3 (top level)
7321 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7322 // [000032] ------------ | / --* const int 1
7323 // [000057] - A---------- \--* = int
7324 // [000056] D------N---- \--* lclVar int V01 arg1
7326 GenTreePtr tmpAssignmentInsertionPoint = last;
7327 GenTreePtr paramAssignmentInsertionPoint = last;
7329 // Process early args. They may contain both setup statements for late args and actual args.
7330 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7331 // below has the correct second argument.
7332 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7333 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7334 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7336 GenTreePtr earlyArg = earlyArgs->Current();
7337 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7339 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7341 // This is a setup node so we need to hoist it.
7342 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7343 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7347 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7348 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7349 GenTreePtr paramAssignStmt =
7350 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7351 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7352 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7354 // All temp assignments will happen before the first param assignment.
7355 tmpAssignmentInsertionPoint = paramAssignStmt;
7361 // Process late args.
7362 int lateArgIndex = 0;
7363 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7364 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7366 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7367 GenTreePtr lateArg = lateArgs->Current();
7368 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7369 GenTreePtr paramAssignStmt =
7370 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7371 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7373 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7375 // All temp assignments will happen before the first param assignment.
7376 tmpAssignmentInsertionPoint = paramAssignStmt;
7380 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7381 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7382 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7383 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7385 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7386 GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7387 GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7388 GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7389 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7393 fgRemoveStmt(block, last);
7395 // Set the loop edge.
7396 block->bbJumpKind = BBJ_ALWAYS;
7397 block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7398 fgAddRefPred(block->bbJumpDest, block);
7399 block->bbFlags &= ~BBF_HAS_JMP;
7402 //------------------------------------------------------------------------------
7403 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7407 // arg - argument to assign
7408 // argTabEntry - argument table entry corresponding to arg
7409 // block --- basic block the call is in
7410 // callILOffset - IL offset of the call
7411 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7412 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7415 // parameter assignment statement if one was inserted; nullptr otherwise.
7417 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
7418 fgArgTabEntryPtr argTabEntry,
7420 IL_OFFSETX callILOffset,
7421 GenTreePtr tmpAssignmentInsertionPoint,
7422 GenTreePtr paramAssignmentInsertionPoint)
7424 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7425 // some argument trees may reference parameters directly.
7427 GenTreePtr argInTemp = nullptr;
7428 unsigned originalArgNum = argTabEntry->argNum;
7429 bool needToAssignParameter = true;
7431 // TODO-CQ: enable calls with struct arguments passed in registers.
7432 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7434 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7436 // The argument is already assigned to a temp or is a const.
7439 else if (arg->OperGet() == GT_LCL_VAR)
7441 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7442 LclVarDsc* varDsc = &lvaTable[lclNum];
7443 if (!varDsc->lvIsParam)
7445 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7448 else if (lclNum == originalArgNum)
7450 // The argument is the same parameter local that we were about to assign so
7451 // we can skip the assignment.
7452 needToAssignParameter = false;
7456 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7457 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7458 // more temp assignments.
7460 GenTreePtr paramAssignStmt = nullptr;
7461 if (needToAssignParameter)
7463 if (argInTemp == nullptr)
7465 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7466 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7467 // doesn't involve any caller parameters.
7468 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7469 GenTreePtr tempSrc = arg;
7470 GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7471 GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7472 GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7473 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7474 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7477 // Now assign the temp to the parameter.
7478 LclVarDsc* paramDsc = lvaTable + originalArgNum;
7479 assert(paramDsc->lvIsParam);
7480 GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7481 GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7482 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7484 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7486 return paramAssignStmt;
7489 /*****************************************************************************
7491 * Transform the given GT_CALL tree for code generation.
7494 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7496 if (call->CanTailCall())
7498 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7499 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7501 // It cannot be an inline candidate
7502 assert(!call->IsInlineCandidate());
7504 const char* szFailReason = nullptr;
7505 bool hasStructParam = false;
7506 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7508 szFailReason = "Might turn into an intrinsic";
7511 if (opts.compNeedSecurityCheck)
7513 szFailReason = "Needs security check";
7515 else if (compLocallocUsed)
7517 szFailReason = "Localloc used";
7519 #ifdef _TARGET_AMD64_
7520 // Needed for Jit64 compat.
7521 // In future, enabling tail calls from methods that need GS cookie check
7522 // would require codegen side work to emit GS cookie check before a tail
7524 else if (getNeedsGSSecurityCookie())
7526 szFailReason = "GS Security cookie check";
7530 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7531 else if (opts.compGcChecks)
7533 szFailReason = "GcChecks";
7536 #if FEATURE_TAILCALL_OPT
7539 // We are still not sure whether it can be a tail call. Because, when converting
7540 // a call to an implicit tail call, we must check that there are no locals with
7541 // their address taken. If this is the case, we have to assume that the address
7542 // has been leaked and the current stack frame must live until after the final
7545 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7546 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
7547 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7548 // during morph stage. The reason for also checking lvAddrExposed is that in case
7549 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7550 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7551 // never to be incorrect.
7553 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7554 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7555 // is set. This avoids the need for iterating through all lcl vars of the current
7556 // method. Right now throughout the code base we are not consistently using 'set'
7557 // method to set lvHasLdAddrOp and lvAddrExposed flags.
7560 bool hasAddrExposedVars = false;
7561 bool hasStructPromotedParam = false;
7562 bool hasPinnedVars = false;
7564 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7566 // If the method is marked as an explicit tail call we will skip the
7567 // following three hazard checks.
7568 // We still must check for any struct parameters and set 'hasStructParam'
7569 // so that we won't transform the recursive tail call into a loop.
7571 if (call->IsImplicitTailCall())
7573 if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
7575 hasAddrExposedVars = true;
7578 if (varDsc->lvPromoted && varDsc->lvIsParam)
7580 hasStructPromotedParam = true;
7583 if (varDsc->lvPinned)
7585 // A tail call removes the method from the stack, which means the pinning
7586 // goes away for the callee. We can't allow that.
7587 hasPinnedVars = true;
7591 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7593 hasStructParam = true;
7594 // This prevents transforming a recursive tail call into a loop
7595 // but doesn't prevent tail call optimization so we need to
7596 // look at the rest of parameters.
7601 if (hasAddrExposedVars)
7603 szFailReason = "Local address taken";
7605 if (hasStructPromotedParam)
7607 szFailReason = "Has Struct Promoted Param";
7611 szFailReason = "Has Pinned Vars";
7614 #endif // FEATURE_TAILCALL_OPT
7616 if (varTypeIsStruct(call))
7618 fgFixupStructReturn(call);
7621 var_types callType = call->TypeGet();
7623 // We have to ensure to pass the incoming retValBuf as the
7624 // outgoing one. Using a temp will not do as this function will
7625 // not regain control to do the copy.
7627 if (info.compRetBuffArg != BAD_VAR_NUM)
7629 noway_assert(callType == TYP_VOID);
7630 GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7631 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7633 szFailReason = "Need to copy return buffer";
7637 // If this is an opportunistic tail call and cannot be dispatched as
7638 // fast tail call, go the non-tail call route. This is done for perf
7641 // Avoid the cost of determining whether can be dispatched as fast tail
7642 // call if we already know that tail call cannot be honored for other
7644 bool canFastTailCall = false;
7645 if (szFailReason == nullptr)
7647 canFastTailCall = fgCanFastTailCall(call);
7648 if (!canFastTailCall)
7650 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7651 // mechanism and never via tail call helper for perf.
7652 if (call->IsImplicitTailCall())
7654 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7656 #ifndef LEGACY_BACKEND
7657 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7659 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7660 // dispatched as a fast tail call.
7662 // Methods with non-standard args will have indirection cell or cookie param passed
7663 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7664 // tail calling the target method and hence ".tail" prefix on such calls needs to be
7667 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7668 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7669 // This is done by by adding stubAddr as an additional arg before the original list of
7670 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7671 // in Stublinkerx86.cpp.
7672 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7673 "called via helper";
7675 #ifdef _TARGET_ARM64_
7678 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7679 // So, bail out if we can't make fast tail call.
7680 szFailReason = "Non-qualified fast tail call";
7683 #endif // LEGACY_BACKEND
7687 // Clear these flags before calling fgMorphCall() to avoid recursion.
7688 bool isTailPrefixed = call->IsTailPrefixedCall();
7689 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7691 #if FEATURE_TAILCALL_OPT
7692 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7696 if (!canFastTailCall && szFailReason == nullptr)
7698 szFailReason = "Non fast tail calls disabled for PAL based systems.";
7700 #endif // FEATURE_PAL
7702 if (szFailReason != nullptr)
7707 printf("\nRejecting tail call late for call ");
7709 printf(": %s\n", szFailReason);
7713 // for non user funcs, we have no handles to report
7714 info.compCompHnd->reportTailCallDecision(nullptr,
7715 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7716 isTailPrefixed, TAILCALL_FAIL, szFailReason);
7721 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7722 // We enable shared-ret tail call optimization for recursive calls even if
7723 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7724 if (gtIsRecursiveCall(call))
7727 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7728 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7729 if (compCurBB->bbJumpKind != BBJ_RETURN)
7731 compCurBB->bbJumpKind = BBJ_RETURN;
7735 // Set this flag before calling fgMorphCall() to prevent inlining this call.
7736 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7738 bool fastTailCallToLoop = false;
7739 #if FEATURE_TAILCALL_OPT
7740 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7741 // or return type is a struct that can be passed in a register.
7743 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7744 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7745 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
7746 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7747 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
7748 // generic type parameters of both caller and callee generic method are the same.
7749 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7750 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7752 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7753 fastTailCallToLoop = true;
7757 // Do some target-specific transformations (before we process the args, etc.)
7758 // This is needed only for tail prefixed calls that cannot be dispatched as
7760 if (!canFastTailCall)
7762 fgMorphTailCall(call);
7765 // Implementation note : If we optimize tailcall to do a direct jump
7766 // to the target function (after stomping on the return address, etc),
7767 // without using CORINFO_HELP_TAILCALL, we have to make certain that
7768 // we don't starve the hijacking logic (by stomping on the hijacked
7769 // return address etc).
7771 // At this point, we are committed to do the tailcall.
7772 compTailCallUsed = true;
7774 CorInfoTailCall tailCallResult;
7776 if (fastTailCallToLoop)
7778 tailCallResult = TAILCALL_RECURSIVE;
7780 else if (canFastTailCall)
7782 tailCallResult = TAILCALL_OPTIMIZED;
7786 tailCallResult = TAILCALL_HELPER;
7789 // for non user funcs, we have no handles to report
7790 info.compCompHnd->reportTailCallDecision(nullptr,
7791 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7792 isTailPrefixed, tailCallResult, nullptr);
7794 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
7795 // to avoid doing any extra work for the return value.
7796 call->gtType = TYP_VOID;
7801 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
7804 if (fastTailCallToLoop)
7806 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
7813 GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
7816 // Tail call needs to be in one of the following IR forms
7817 // Either a call stmt or
7818 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
7819 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
7820 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
7822 // GT_CASTS may be nested.
7823 genTreeOps stmtOper = stmtExpr->gtOper;
7824 if (stmtOper == GT_CALL)
7826 noway_assert(stmtExpr == call);
7830 noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
7831 GenTreePtr treeWithCall;
7832 if (stmtOper == GT_RETURN)
7834 treeWithCall = stmtExpr->gtGetOp1();
7836 else if (stmtOper == GT_COMMA)
7838 // Second operation must be nop.
7839 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
7840 treeWithCall = stmtExpr->gtGetOp1();
7844 treeWithCall = stmtExpr->gtGetOp2();
7848 while (treeWithCall->gtOper == GT_CAST)
7850 noway_assert(!treeWithCall->gtOverflow());
7851 treeWithCall = treeWithCall->gtGetOp1();
7854 noway_assert(treeWithCall == call);
7858 // For void calls, we would have created a GT_CALL in the stmt list.
7859 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
7860 // For calls returning structs, we would have a void call, followed by a void return.
7861 // For debuggable code, it would be an assignment of the call to a temp
7862 // We want to get rid of any of this extra trees, and just leave
7864 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
7866 #ifdef _TARGET_AMD64_
7867 // Legacy Jit64 Compat:
7868 // There could be any number of GT_NOPs between tail call and GT_RETURN.
7869 // That is tail call pattern could be one of the following:
7870 // 1) tail.call, nop*, ret
7871 // 2) tail.call, nop*, pop, nop*, ret
7872 // 3) var=tail.call, nop*, ret(var)
7873 // 4) var=tail.call, nop*, pop, ret
7874 // 5) comma(tail.call, nop), nop*, ret
7876 // See impIsTailCallILPattern() for details on tail call IL patterns
7877 // that are supported.
7878 if (stmtExpr->gtOper != GT_RETURN)
7880 // First delete all GT_NOPs after the call
7881 GenTreeStmt* morphStmtToRemove = nullptr;
7882 while (nextMorphStmt != nullptr)
7884 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
7885 if (!nextStmtExpr->IsNothingNode())
7890 morphStmtToRemove = nextMorphStmt;
7891 nextMorphStmt = morphStmtToRemove->gtNextStmt;
7892 fgRemoveStmt(compCurBB, morphStmtToRemove);
7895 // Check to see if there is a pop.
7896 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
7897 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
7899 // Note that pop opcode may or may not result in a new stmt (for details see
7900 // impImportBlockCode()). Hence, it is not possible to assert about the IR
7901 // form generated by pop but pop tree must be side-effect free so that we can
7902 // delete it safely.
7903 GenTreeStmt* popStmt = nextMorphStmt;
7904 nextMorphStmt = nextMorphStmt->gtNextStmt;
7906 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
7907 // the constituent nodes.
7908 GenTreePtr popExpr = popStmt->gtStmtExpr;
7909 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
7910 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
7912 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
7913 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
7915 noway_assert(isSideEffectFree);
7916 fgRemoveStmt(compCurBB, popStmt);
7919 // Next delete any GT_NOP nodes after pop
7920 while (nextMorphStmt != nullptr)
7922 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
7923 if (!nextStmtExpr->IsNothingNode())
7928 morphStmtToRemove = nextMorphStmt;
7929 nextMorphStmt = morphStmtToRemove->gtNextStmt;
7930 fgRemoveStmt(compCurBB, morphStmtToRemove);
7933 #endif // _TARGET_AMD64_
7935 // Delete GT_RETURN if any
7936 if (nextMorphStmt != nullptr)
7938 GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
7939 noway_assert(retExpr->gtOper == GT_RETURN);
7941 // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
7942 // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
7943 if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
7945 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
7946 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
7947 retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
7950 fgRemoveStmt(compCurBB, nextMorphStmt);
7953 fgMorphStmt->gtStmtExpr = call;
7955 // Tail call via helper: The VM can't use return address hijacking if we're
7956 // not going to return and the helper doesn't have enough info to safely poll,
7957 // so we poll before the tail call, if the block isn't already safe. Since
7958 // tail call via helper is a slow mechanism it doen't matter whether we emit
7959 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
7960 // size increase if all most all methods are expected to be tail calls (e.g. F#).
7962 // Note that we can avoid emitting GC-poll if we know that the current BB is
7963 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
7964 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
7965 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
7966 // now it not clear whether optimizing slow tail calls is worth the effort. As a
7967 // low cost check, we check whether the first and current basic blocks are
7970 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
7971 // is going to mark the method as fully interruptible if the block containing this tail
7972 // call is reachable without executing any call.
7973 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
7974 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
7976 // We didn't insert a poll block, so we need to morph the call now
7977 // (Normally it will get morphed when we get to the split poll block)
7978 GenTreePtr temp = fgMorphCall(call);
7979 noway_assert(temp == call);
7982 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
7983 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
7985 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
7986 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
7987 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
7989 if (canFastTailCall)
7991 compCurBB->bbFlags |= BBF_HAS_JMP;
7995 compCurBB->bbJumpKind = BBJ_THROW;
7998 // For non-void calls, we return a place holder which will be
7999 // used by the parent GT_RETURN node of this call.
8001 GenTree* result = call;
8002 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8005 // Return a dummy node, as the return is already removed.
8006 if (callType == TYP_STRUCT)
8008 // This is a HFA, use float 0.
8009 callType = TYP_FLOAT;
8011 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8012 // Return a dummy node, as the return is already removed.
8013 if (varTypeIsStruct(callType))
8015 // This is a register-returned struct. Return a 0.
8016 // The actual return registers are hacked in lower and the register allocator.
8021 // Return a dummy node, as the return is already removed.
8022 if (varTypeIsSIMD(callType))
8024 callType = TYP_DOUBLE;
8027 result = gtNewZeroConNode(genActualType(callType));
8028 result = fgMorphTree(result);
8036 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8037 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8038 #ifdef FEATURE_READYTORUN_COMPILER
8039 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8042 (call == fgMorphStmt->gtStmtExpr))
8044 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8045 // Transform it into a null check.
8047 GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8049 GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8050 nullCheck->gtFlags |= GTF_EXCEPT;
8052 return fgMorphTree(nullCheck);
8055 noway_assert(call->gtOper == GT_CALL);
8058 // Only count calls once (only in the global morph phase)
8062 if (call->gtCallType == CT_INDIRECT)
8065 optIndirectCallCount++;
8067 else if (call->gtCallType == CT_USER_FUNC)
8070 if (call->IsVirtual())
8072 optIndirectCallCount++;
8077 // Couldn't inline - remember that this BB contains method calls
8079 // If this is a 'regular' call, mark the basic block as
8080 // having a call (for computing full interruptibility).
8081 CLANG_FORMAT_COMMENT_ANCHOR;
8083 #ifdef _TARGET_AMD64_
8084 // Amd64 note: If this is a fast tail call then don't count it as a call
8085 // since we don't insert GC-polls but instead make the method fully GC
8087 if (!call->IsFastTailCall())
8090 if (call->gtCallType == CT_INDIRECT)
8092 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8094 else if (call->gtCallType == CT_USER_FUNC)
8096 if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
8098 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8101 // otherwise we have a CT_HELPER
8104 // Morph Type.op_Equality and Type.op_Inequality
8105 // We need to do this before the arguments are morphed
8106 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8108 CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8110 genTreeOps simpleOp = GT_CALL;
8111 if (methodID == CORINFO_INTRINSIC_TypeEQ)
8115 else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8120 if (simpleOp == GT_EQ || simpleOp == GT_NE)
8122 noway_assert(call->TypeGet() == TYP_INT);
8124 // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8125 // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8126 // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8127 // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8128 // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8129 // optimization below.
8131 GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8132 GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8134 if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8136 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8138 // fgMorphSmpOp will further optimize the following patterns:
8139 // 1. typeof(...) == typeof(...)
8140 // 2. typeof(...) == obj.GetType()
8141 return fgMorphTree(compare);
8146 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8147 GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8149 unsigned retValTmpNum = BAD_VAR_NUM;
8150 CORINFO_CLASS_HANDLE structHnd = nullptr;
8151 if (call->HasRetBufArg() &&
8152 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8154 // We're enforcing the invariant that return buffers pointers (at least for
8155 // struct return types containing GC pointers) are never pointers into the heap.
8156 // The large majority of cases are address of local variables, which are OK.
8157 // Otherwise, allocate a local of the given struct type, pass its address,
8158 // then assign from that into the proper destination. (We don't need to do this
8159 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8160 // will maintain the same invariant.)
8162 GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8163 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8164 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8166 // We'll exempt helper calls from this, assuming that the helper implementation
8167 // follows the old convention, and does whatever barrier is required.
8168 if (call->gtCallType != CT_HELPER)
8170 structHnd = call->gtRetClsHnd;
8171 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8172 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8173 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8177 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8178 lvaSetStruct(retValTmpNum, structHnd, true);
8179 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8184 call->gtCallArgs->gtOp.gtOp1 = dest;
8187 /* Process the "normal" argument list */
8188 call = fgMorphArgs(call);
8189 noway_assert(call->gtOper == GT_CALL);
8191 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8192 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8193 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8195 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8196 if (value->IsIntegralConst(0))
8198 assert(value->OperGet() == GT_CNS_INT);
8200 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8201 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8203 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8204 // the spill trees as well if necessary.
8205 GenTreeOp* argSetup = nullptr;
8206 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8208 GenTree* const arg = earlyArgs->Current();
8209 if (arg->OperGet() != GT_ASG)
8215 assert(arg != index);
8217 arg->gtFlags &= ~GTF_LATE_ARG;
8219 GenTree* op1 = argSetup;
8222 op1 = gtNewNothingNode();
8224 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8228 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8231 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8236 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8237 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8238 return WALK_CONTINUE;
8241 fgWalkTreePost(&arr, resetMorphedFlag);
8242 fgWalkTreePost(&index, resetMorphedFlag);
8243 fgWalkTreePost(&value, resetMorphedFlag);
8246 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8247 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8248 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8249 arrStore->gtFlags |= GTF_ASG;
8251 GenTree* result = fgMorphTree(arrStore);
8252 if (argSetup != nullptr)
8254 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8256 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8264 // Optimize get_ManagedThreadId(get_CurrentThread)
8265 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8266 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8268 noway_assert(origDest == nullptr);
8269 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8271 GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8273 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8274 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8275 CORINFO_INTRINSIC_GetCurrentManagedThread)
8277 // substitute expression with call to helper
8278 GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8279 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8280 return fgMorphTree(newCall);
8284 if (origDest != nullptr)
8286 GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8287 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8288 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8289 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8291 if (origDest->OperGet() == GT_ASG)
8293 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8295 GenTreePtr var = origDest->gtOp.gtOp1;
8296 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8297 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8300 GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8301 copyBlk = fgMorphTree(copyBlk);
8302 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8304 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8309 if (call->IsNoReturn())
8312 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8313 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8314 // As a result the compiler won't need to preserve live registers across the call.
8316 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8317 // Besides, the tail call code is part of the epilog and converting the block to
8318 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8319 // only for BBJ_RETURN blocks.
8321 // Currently this doesn't work for non-void callees. Some of the code that handles
8322 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8323 // do not have this flag by default. We could add the flag here but the proper solution
8324 // would be to replace the return expression with a local var node during inlining
8325 // so the rest of the call tree stays in a separate statement. That statement can then
8326 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8329 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8331 fgRemoveRestOfBlock = true;
8338 /*****************************************************************************
8340 * Transform the given GTK_CONST tree for code generation.
8343 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8345 noway_assert(tree->OperKind() & GTK_CONST);
8347 /* Clear any exception flags or other unnecessary flags
8348 * that may have been set before folding this node to a constant */
8350 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8352 if (tree->OperGet() != GT_CNS_STR)
8357 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8358 // guarantee slow performance for that block. Instead cache the return value
8359 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8361 if (compCurBB->bbJumpKind == BBJ_THROW)
8363 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8364 if (helper != CORINFO_HELP_UNDEF)
8366 // For un-important blocks, we want to construct the string lazily
8368 GenTreeArgList* args;
8369 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8371 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8375 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8376 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8379 tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8380 return fgMorphTree(tree);
8384 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8387 InfoAccessType iat =
8388 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8390 tree = gtNewStringLiteralNode(iat, pValue);
8392 return fgMorphTree(tree);
8395 /*****************************************************************************
8397 * Transform the given GTK_LEAF tree for code generation.
8400 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8402 noway_assert(tree->OperKind() & GTK_LEAF);
8404 if (tree->gtOper == GT_LCL_VAR)
8406 return fgMorphLocalVar(tree);
8409 else if (tree->gtOper == GT_LCL_FLD)
8411 if (info.compIsVarArgs)
8413 GenTreePtr newTree =
8414 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8415 if (newTree != nullptr)
8417 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8419 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8425 #endif // _TARGET_X86_
8426 else if (tree->gtOper == GT_FTN_ADDR)
8428 CORINFO_CONST_LOOKUP addrInfo;
8430 #ifdef FEATURE_READYTORUN_COMPILER
8431 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8433 addrInfo = tree->gtFptrVal.gtEntryPoint;
8438 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8441 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8443 tree->SetOper(GT_CNS_INT);
8444 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8445 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8447 switch (addrInfo.accessType)
8450 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8451 tree->gtFlags |= GTF_IND_INVARIANT;
8456 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8460 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8464 noway_assert(!"Unknown addrInfo.accessType");
8467 return fgMorphTree(tree);
8473 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8475 GenTreeLclVarCommon* lclVarCmnTree;
8476 bool isEntire = false;
8477 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8481 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8485 // We consider partial definitions to be modeled as uses followed by definitions.
8486 // This captures the idea that precedings defs are not necessarily made redundant
8487 // by this definition.
8488 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8493 //------------------------------------------------------------------------
8494 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8497 // tree - The block assignment to be possibly morphed
8500 // The modified tree if successful, nullptr otherwise.
8503 // 'tree' must be a block assignment.
8506 // If successful, this method always returns the incoming tree, modifying only
8509 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8511 // This must be a block assignment.
8512 noway_assert(tree->OperIsBlkOp());
8513 var_types asgType = tree->TypeGet();
8515 GenTreePtr asg = tree;
8516 GenTreePtr dest = asg->gtGetOp1();
8517 GenTreePtr src = asg->gtGetOp2();
8518 unsigned destVarNum = BAD_VAR_NUM;
8519 LclVarDsc* destVarDsc = nullptr;
8520 GenTreePtr lclVarTree = nullptr;
8521 bool isCopyBlock = asg->OperIsCopyBlkOp();
8522 bool isInitBlock = !isCopyBlock;
8525 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8527 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8528 // The SIMD type in question could be Vector2f which is 8-bytes in size.
8529 // The below check is to make sure that we don't turn that copyblk
8530 // into a assignment, since rationalizer logic will transform the
8531 // copyblk appropriately. Otherwise, the transformation made in this
8532 // routine will prevent rationalizer logic and we might end up with
8533 // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8535 // TODO-1stClassStructs: This is here to preserve old behavior.
8536 // It should be eliminated.
8537 if (src->OperGet() == GT_SIMD)
8543 if (dest->gtEffectiveVal()->OperIsBlk())
8545 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8546 size = lhsBlk->Size();
8547 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8549 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8550 destVarDsc = &(lvaTable[destVarNum]);
8552 if (lhsBlk->OperGet() == GT_OBJ)
8554 clsHnd = lhsBlk->AsObj()->gtClass;
8559 // Is this an enregisterable struct that is already a simple assignment?
8560 // This can happen if we are re-morphing.
8561 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8565 noway_assert(dest->OperIsLocal());
8567 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8568 destVarDsc = &(lvaTable[destVarNum]);
8571 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8572 size = info.compCompHnd->getClassSize(clsHnd);
8576 size = destVarDsc->lvExactSize;
8581 // See if we can do a simple transformation:
8583 // GT_ASG <TYP_size>
8585 // GT_IND GT_IND or CNS_INT
8590 if (size == REGSIZE_BYTES)
8592 if (clsHnd == NO_CLASS_HANDLE)
8594 // A register-sized cpblk can be treated as an integer asignment.
8595 asgType = TYP_I_IMPL;
8600 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8601 asgType = getJitGCType(gcPtr);
8612 asgType = TYP_SHORT;
8615 #ifdef _TARGET_64BIT_
8619 #endif // _TARGET_64BIT_
8623 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8624 if (!varTypeIsStruct(asgType))
8626 // For initBlk, a non constant source is not going to allow us to fiddle
8627 // with the bits to create a single assigment.
8628 noway_assert(size <= REGSIZE_BYTES);
8630 if (isInitBlock && !src->IsConstInitVal())
8635 if (destVarDsc != nullptr)
8637 #if LOCAL_ASSERTION_PROP
8638 // Kill everything about dest
8639 if (optLocalAssertionProp)
8641 if (optAssertionCount > 0)
8643 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8646 #endif // LOCAL_ASSERTION_PROP
8648 // A previous incarnation of this code also required the local not to be
8649 // address-exposed(=taken). That seems orthogonal to the decision of whether
8650 // to do field-wise assignments: being address-exposed will cause it to be
8651 // "dependently" promoted, so it will be in the right memory location. One possible
8652 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8653 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
8654 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8655 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
8656 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8658 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
8661 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8663 // Use the dest local var directly, as well as its type.
8665 asgType = destVarDsc->lvType;
8667 // If the block operation had been a write to a local var of a small int type,
8668 // of the exact size of the small int type, and the var is NormalizeOnStore,
8669 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8670 // have done that normalization. If we're now making it into an assignment,
8671 // the NormalizeOnStore will work, and it can be a full def.
8672 if (destVarDsc->lvNormalizeOnStore())
8674 dest->gtFlags &= (~GTF_VAR_USEASG);
8679 // Could be a non-promoted struct, or a floating point type local, or
8680 // an int subject to a partial write. Don't enregister.
8681 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8683 // Mark the local var tree as a definition point of the local.
8684 lclVarTree->gtFlags |= GTF_VAR_DEF;
8685 if (size < destVarDsc->lvExactSize)
8686 { // If it's not a full-width assignment....
8687 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8690 if (dest == lclVarTree)
8692 dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8697 // Check to ensure we don't have a reducible *(& ... )
8698 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8700 GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8701 // Ignore reinterpret casts between int/gc
8702 if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8705 asgType = addrOp->TypeGet();
8709 if (dest->gtEffectiveVal()->OperIsIndir())
8711 // If we have no information about the destination, we have to assume it could
8712 // live anywhere (not just in the GC heap).
8713 // Mark the GT_IND node so that we use the correct write barrier helper in case
8714 // the field is a GC ref.
8716 if (!fgIsIndirOfAddrOfLocal(dest))
8718 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8719 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8723 LclVarDsc* srcVarDsc = nullptr;
8726 if (src->OperGet() == GT_LCL_VAR)
8729 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8731 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8733 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8735 if (srcVarDsc != nullptr)
8737 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8739 // Let fgMorphCopyBlock handle it.
8742 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8743 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8745 // Use the src local var directly.
8750 #ifndef LEGACY_BACKEND
8752 // The source argument of the copyblk can potentially
8753 // be accessed only through indir(addr(lclVar))
8754 // or indir(lclVarAddr) in rational form and liveness
8755 // won't account for these uses. That said,
8756 // we have to mark this local as address exposed so
8757 // we don't delete it as a dead store later on.
8758 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
8759 lvaTable[lclVarNum].lvAddrExposed = true;
8760 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8762 #else // LEGACY_BACKEND
8763 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8764 #endif // LEGACY_BACKEND
8766 if (src == lclVarTree)
8768 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
8769 src = gtNewOperNode(GT_IND, asgType, srcAddr);
8773 assert(src->OperIsIndir());
8777 // If we have no information about the src, we have to assume it could
8778 // live anywhere (not just in the GC heap).
8779 // Mark the GT_IND node so that we use the correct write barrier helper in case
8780 // the field is a GC ref.
8782 if (!fgIsIndirOfAddrOfLocal(src))
8784 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8791 if (varTypeIsSIMD(asgType))
8793 assert(!isCopyBlock); // Else we would have returned the tree above.
8794 noway_assert(src->IsIntegralConst(0));
8795 noway_assert(destVarDsc != nullptr);
8797 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
8798 tree->gtOp.gtOp2 = src;
8804 if (src->OperIsInitVal())
8806 src = src->gtGetOp1();
8808 assert(src->IsCnsIntOrI());
8809 // This will mutate the integer constant, in place, to be the correct
8810 // value for the type we are using in the assignment.
8811 src->AsIntCon()->FixupInitBlkValue(asgType);
8815 // Ensure that the dest is setup appropriately.
8816 if (dest->gtEffectiveVal()->OperIsIndir())
8818 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
8821 // Ensure that the rhs is setup appropriately.
8824 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
8827 // Set the lhs and rhs on the assignment.
8828 if (dest != tree->gtOp.gtOp1)
8830 asg->gtOp.gtOp1 = dest;
8832 if (src != asg->gtOp.gtOp2)
8834 asg->gtOp.gtOp2 = src;
8837 asg->ChangeType(asgType);
8838 dest->gtFlags |= GTF_DONT_CSE;
8839 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
8840 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
8841 asg->gtFlags &= ~GTF_REVERSE_OPS;
8846 printf("fgMorphOneAsgBlock (after):\n");
8856 //------------------------------------------------------------------------
8857 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
8860 // tree - a tree node with a gtOper of GT_INITBLK
8861 // the child nodes for tree have already been Morphed
8864 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
8865 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
8866 // If we have performed struct promotion of the Dest() then we will try to
8867 // perform a field by field assignment for each of the promoted struct fields
8870 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
8871 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
8872 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
8874 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
8876 // We must have the GT_ASG form of InitBlkOp.
8877 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
8879 bool morphed = false;
8882 GenTree* asg = tree;
8883 GenTree* src = tree->gtGetOp2();
8884 GenTree* origDest = tree->gtGetOp1();
8886 GenTree* dest = fgMorphBlkNode(origDest, true);
8887 if (dest != origDest)
8889 tree->gtOp.gtOp1 = dest;
8891 tree->gtType = dest->TypeGet();
8892 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
8893 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
8894 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
8896 src->gtType = TYP_INT;
8898 JITDUMP("\nfgMorphInitBlock:");
8900 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
8903 JITDUMP(" using oneAsgTree.\n");
8908 GenTree* destAddr = nullptr;
8909 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
8910 GenTree* blockSize = nullptr;
8911 unsigned blockWidth = 0;
8912 FieldSeqNode* destFldSeq = nullptr;
8913 LclVarDsc* destLclVar = nullptr;
8914 bool destDoFldAsg = false;
8915 unsigned destLclNum = BAD_VAR_NUM;
8916 bool blockWidthIsConst = false;
8917 GenTreeLclVarCommon* lclVarTree = nullptr;
8918 if (dest->IsLocal())
8920 lclVarTree = dest->AsLclVarCommon();
8924 if (dest->OperIsBlk())
8926 destAddr = dest->AsBlk()->Addr();
8927 blockWidth = dest->AsBlk()->gtBlkSize;
8931 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
8932 destAddr = dest->gtGetOp1();
8933 blockWidth = genTypeSize(dest->TypeGet());
8936 if (lclVarTree != nullptr)
8938 destLclNum = lclVarTree->gtLclNum;
8939 destLclVar = &lvaTable[destLclNum];
8940 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
8941 blockWidthIsConst = true;
8945 if (dest->gtOper == GT_DYN_BLK)
8947 // The size must be an integer type
8948 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
8949 assert(varTypeIsIntegral(blockSize->gtType));
8953 assert(blockWidth != 0);
8954 blockWidthIsConst = true;
8957 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
8959 destLclNum = lclVarTree->gtLclNum;
8960 destLclVar = &lvaTable[destLclNum];
8963 if (destLclNum != BAD_VAR_NUM)
8965 #if LOCAL_ASSERTION_PROP
8966 // Kill everything about destLclNum (and its field locals)
8967 if (optLocalAssertionProp)
8969 if (optAssertionCount > 0)
8971 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
8974 #endif // LOCAL_ASSERTION_PROP
8976 if (destLclVar->lvPromoted && blockWidthIsConst)
8978 assert(initVal->OperGet() == GT_CNS_INT);
8979 noway_assert(varTypeIsStruct(destLclVar));
8980 noway_assert(!opts.MinOpts());
8981 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
8983 JITDUMP(" dest is address exposed");
8987 if (blockWidth == destLclVar->lvExactSize)
8989 JITDUMP(" (destDoFldAsg=true)");
8990 // We may decide later that a copyblk is required when this struct has holes
8991 destDoFldAsg = true;
8995 JITDUMP(" with mismatched size");
9001 // Can we use field by field assignment for the dest?
9002 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9004 JITDUMP(" dest contains holes");
9005 destDoFldAsg = false;
9008 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9010 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9011 // we need to change it back.
9012 if (!destDoFldAsg && !dest->OperIsBlk())
9014 noway_assert(blockWidth != 0);
9015 tree->gtOp.gtOp1 = origDest;
9016 tree->gtType = origDest->gtType;
9019 if (!destDoFldAsg && (destLclVar != nullptr))
9021 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9022 if (!destLclVar->lvRegStruct)
9024 // Mark it as DoNotEnregister.
9025 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9029 // Mark the dest struct as DoNotEnreg
9030 // when they are LclVar structs and we are using a CopyBlock
9031 // or the struct is not promoted
9035 #if CPU_USES_BLOCK_MOVE
9036 compBlkOpUsed = true;
9038 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9039 tree->gtOp.gtOp1 = dest;
9040 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9044 // The initVal must be a constant of TYP_INT
9045 noway_assert(initVal->OperGet() == GT_CNS_INT);
9046 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9048 // The dest must be of a struct type.
9049 noway_assert(varTypeIsStruct(destLclVar));
9052 // Now, convert InitBlock to individual assignments
9056 INDEBUG(morphed = true);
9060 unsigned fieldLclNum;
9061 unsigned fieldCnt = destLclVar->lvFieldCnt;
9063 for (unsigned i = 0; i < fieldCnt; ++i)
9065 fieldLclNum = destLclVar->lvFieldLclStart + i;
9066 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9068 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9069 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9070 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9072 srcCopy = gtCloneExpr(initVal);
9073 noway_assert(srcCopy != nullptr);
9075 // need type of oper to be same as tree
9076 if (dest->gtType == TYP_LONG)
9078 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9079 // copy and extend the value
9080 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9081 /* Change the types of srcCopy to TYP_LONG */
9082 srcCopy->gtType = TYP_LONG;
9084 else if (varTypeIsFloating(dest->gtType))
9086 srcCopy->ChangeOperConst(GT_CNS_DBL);
9087 // setup the bit pattern
9088 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9089 sizeof(srcCopy->gtDblCon.gtDconVal));
9090 /* Change the types of srcCopy to TYP_DOUBLE */
9091 srcCopy->gtType = TYP_DOUBLE;
9095 noway_assert(srcCopy->gtOper == GT_CNS_INT);
9096 noway_assert(srcCopy->TypeGet() == TYP_INT);
9097 // setup the bit pattern
9098 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9099 sizeof(srcCopy->gtIntCon.gtIconVal));
9102 srcCopy->gtType = dest->TypeGet();
9104 asg = gtNewAssignNode(dest, srcCopy);
9106 #if LOCAL_ASSERTION_PROP
9107 if (optLocalAssertionProp)
9109 optAssertionGen(asg);
9111 #endif // LOCAL_ASSERTION_PROP
9115 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9128 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9132 printf("fgMorphInitBlock (after):\n");
9141 //------------------------------------------------------------------------
9142 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9145 // tree - the node to be modified.
9146 // type - the type of indirection to change it to.
9149 // Returns the node, modified in place.
9152 // This doesn't really warrant a separate method, but is here to abstract
9153 // the fact that these nodes can be modified in-place.
9155 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9157 tree->SetOper(GT_IND);
9158 tree->gtType = type;
9162 //------------------------------------------------------------------------
9163 // fgMorphGetStructAddr: Gets the address of a struct object
9166 // pTree - the parent's pointer to the struct object node
9167 // clsHnd - the class handle for the struct type
9168 // isRValue - true if this is a source (not dest)
9171 // Returns the address of the struct value, possibly modifying the existing tree to
9172 // sink the address below any comma nodes (this is to canonicalize for value numbering).
9173 // If this is a source, it will morph it to an GT_IND before taking its address,
9174 // since it may not be remorphed (and we don't want blk nodes as rvalues).
9176 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9179 GenTree* tree = *pTree;
9180 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9181 // need to hang onto that for the purposes of value numbering.
9182 if (tree->OperIsIndir())
9184 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9186 addr = tree->gtOp.gtOp1;
9190 if (isRValue && tree->OperIsBlk())
9192 tree->ChangeOper(GT_IND);
9194 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9197 else if (tree->gtOper == GT_COMMA)
9199 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9200 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9201 tree->gtType = TYP_BYREF;
9206 switch (tree->gtOper)
9213 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9217 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9218 // not going to use "temp"
9219 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9220 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9229 //------------------------------------------------------------------------
9230 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9233 // tree - The struct type node
9234 // isDest - True if this is the destination of the assignment
9237 // Returns the possibly-morphed node. The caller is responsible for updating
9238 // the parent of this node..
9240 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9242 if (tree->gtOper == GT_COMMA)
9244 GenTree* effectiveVal = tree->gtEffectiveVal();
9245 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9247 addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9249 // In order to CSE and value number array index expressions and bounds checks,
9250 // the commas in which they are contained need to match.
9251 // The pattern is that the COMMA should be the address expression.
9252 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9253 // TODO-1stClassStructs: Consider whether this can be improved.
9254 // Also consider whether some of this can be included in gtNewBlockVal (though note
9255 // that doing so may cause us to query the type system before we otherwise would).
9256 GenTree* lastComma = nullptr;
9257 for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9259 next->gtType = TYP_BYREF;
9262 if (lastComma != nullptr)
9264 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9265 lastComma->gtOp.gtOp2 = addr;
9268 var_types structType = effectiveVal->TypeGet();
9269 if (structType == TYP_STRUCT)
9271 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9272 if (structHnd == NO_CLASS_HANDLE)
9274 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9278 tree = gtNewObjNode(structHnd, addr);
9279 if (tree->OperGet() == GT_OBJ)
9281 gtSetObjGcInfo(tree->AsObj());
9287 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9290 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9294 if (!tree->OperIsBlk())
9298 GenTreeBlk* blkNode = tree->AsBlk();
9299 if (blkNode->OperGet() == GT_DYN_BLK)
9301 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9303 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9304 // A GT_BLK with size of zero is not supported,
9305 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9308 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9309 blkNode->ChangeOper(GT_BLK);
9310 blkNode->gtBlkSize = size;
9322 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9323 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9325 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9326 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9328 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9335 //------------------------------------------------------------------------
9336 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9339 // tree - The block operand
9340 // asgType - The type of the assignment
9341 // blockWidth - The size of the block
9342 // isDest - true iff this is the destination of the assignment
9345 // Returns the morphed block operand
9348 // This does the following:
9349 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9350 // - Ensures that any COMMAs are above ADDR nodes.
9351 // Although 'tree' WAS an operand of a block assignment, the assignment
9352 // may have been retyped to be a scalar assignment.
9354 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9356 GenTree* effectiveVal = tree->gtEffectiveVal();
9358 if (!varTypeIsStruct(asgType))
9360 if (effectiveVal->OperIsIndir())
9362 GenTree* addr = effectiveVal->AsIndir()->Addr();
9363 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9365 effectiveVal = addr->gtGetOp1();
9367 else if (effectiveVal->OperIsBlk())
9369 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9373 effectiveVal->gtType = asgType;
9376 else if (effectiveVal->TypeGet() != asgType)
9378 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9379 effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
9384 GenTreeIndir* indirTree = nullptr;
9385 GenTreeLclVarCommon* lclNode = nullptr;
9386 bool needsIndirection = true;
9388 if (effectiveVal->OperIsIndir())
9390 indirTree = effectiveVal->AsIndir();
9391 GenTree* addr = effectiveVal->AsIndir()->Addr();
9392 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9394 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9397 else if (effectiveVal->OperGet() == GT_LCL_VAR)
9399 lclNode = effectiveVal->AsLclVarCommon();
9402 if (varTypeIsSIMD(asgType))
9404 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9405 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9408 needsIndirection = false;
9409 effectiveVal = indirTree->Addr()->gtGetOp1();
9411 if (effectiveVal->OperIsSIMD())
9413 needsIndirection = false;
9416 #endif // FEATURE_SIMD
9417 if (lclNode != nullptr)
9419 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9420 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9422 #ifndef LEGACY_BACKEND
9423 effectiveVal = lclNode;
9424 needsIndirection = false;
9425 #endif // !LEGACY_BACKEND
9429 // This may be a lclVar that was determined to be address-exposed.
9430 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9433 if (needsIndirection)
9435 if (indirTree != nullptr)
9437 // We should never find a struct indirection on the lhs of an assignment.
9438 assert(!isDest || indirTree->OperIsBlk());
9439 if (!isDest && indirTree->OperIsBlk())
9441 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9447 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9450 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9451 if (clsHnd == NO_CLASS_HANDLE)
9453 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9457 newTree = gtNewObjNode(clsHnd, addr);
9458 if (isDest && (newTree->OperGet() == GT_OBJ))
9460 gtSetObjGcInfo(newTree->AsObj());
9462 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9464 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9465 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9466 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9467 // separately now to avoid excess diffs.
9468 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9474 newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9476 effectiveVal = newTree;
9480 tree = effectiveVal;
9484 //------------------------------------------------------------------------
9485 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9488 // dest - the GT_OBJ or GT_STORE_OBJ
9491 // The destination must be known (by the caller) to be on the stack.
9494 // If we have a CopyObj with a dest on the stack, and its size is small enouch
9495 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9496 // GC Unsafe CopyBlk that is non-interruptible.
9497 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9499 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9501 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9502 assert(dest->gtGcPtrCount != 0);
9503 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9505 bool destOnStack = false;
9506 GenTree* destAddr = dest->Addr();
9507 assert(destAddr->IsLocalAddrExpr() != nullptr);
9509 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9511 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9512 dest->SetOper(newOper);
9513 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9515 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9518 //------------------------------------------------------------------------
9519 // fgMorphCopyBlock: Perform the Morphing of block copy
9522 // tree - a block copy (i.e. an assignment with a block op on the lhs).
9525 // We can return the orginal block copy unmodified (least desirable, but always correct)
9526 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9527 // If we have performed struct promotion of the Source() or the Dest() then we will try to
9528 // perform a field by field assignment for each of the promoted struct fields.
9531 // The child nodes for tree have already been Morphed.
9534 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9535 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9536 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9537 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9538 // can not use a field by field assignment and must the orginal block copy unmodified.
9540 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9542 noway_assert(tree->OperIsCopyBlkOp());
9544 JITDUMP("\nfgMorphCopyBlock:");
9546 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9548 GenTree* asg = tree;
9549 GenTree* rhs = asg->gtGetOp2();
9550 GenTree* dest = asg->gtGetOp1();
9552 #if FEATURE_MULTIREG_RET
9553 // If this is a multi-reg return, we will not do any morphing of this node.
9554 if (rhs->IsMultiRegCall())
9556 assert(dest->OperGet() == GT_LCL_VAR);
9557 JITDUMP(" not morphing a multireg call return\n");
9560 #endif // FEATURE_MULTIREG_RET
9562 // If we have an array index on the lhs, we need to create an obj node.
9564 dest = fgMorphBlkNode(dest, true);
9565 if (dest != asg->gtGetOp1())
9567 asg->gtOp.gtOp1 = dest;
9568 if (dest->IsLocal())
9570 dest->gtFlags |= GTF_VAR_DEF;
9573 asg->gtType = dest->TypeGet();
9574 rhs = fgMorphBlkNode(rhs, false);
9576 asg->gtOp.gtOp2 = rhs;
9578 GenTreePtr oldTree = tree;
9579 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9583 JITDUMP(" using oneAsgTree.\n");
9588 unsigned blockWidth;
9589 bool blockWidthIsConst = false;
9590 GenTreeLclVarCommon* lclVarTree = nullptr;
9591 GenTreeLclVarCommon* srcLclVarTree = nullptr;
9592 unsigned destLclNum = BAD_VAR_NUM;
9593 LclVarDsc* destLclVar = nullptr;
9594 FieldSeqNode* destFldSeq = nullptr;
9595 bool destDoFldAsg = false;
9596 GenTreePtr destAddr = nullptr;
9597 GenTreePtr srcAddr = nullptr;
9598 bool destOnStack = false;
9599 bool hasGCPtrs = false;
9601 JITDUMP("block assignment to morph:\n");
9604 if (dest->IsLocal())
9606 blockWidthIsConst = true;
9608 if (dest->gtOper == GT_LCL_VAR)
9610 lclVarTree = dest->AsLclVarCommon();
9611 destLclNum = lclVarTree->gtLclNum;
9612 destLclVar = &lvaTable[destLclNum];
9613 if (destLclVar->lvType == TYP_STRUCT)
9615 // It would be nice if lvExactSize always corresponded to the size of the struct,
9616 // but it doesn't always for the temps that the importer creates when it spills side
9618 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9619 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9623 blockWidth = genTypeSize(destLclVar->lvType);
9625 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9629 assert(dest->TypeGet() != TYP_STRUCT);
9630 assert(dest->gtOper == GT_LCL_FLD);
9631 blockWidth = genTypeSize(dest->TypeGet());
9632 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9633 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9638 GenTree* effectiveDest = dest->gtEffectiveVal();
9639 if (effectiveDest->OperGet() == GT_IND)
9641 assert(dest->TypeGet() != TYP_STRUCT);
9642 blockWidth = genTypeSize(effectiveDest->TypeGet());
9643 blockWidthIsConst = true;
9644 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9646 destAddr = dest->gtGetOp1();
9651 assert(effectiveDest->OperIsBlk());
9652 GenTreeBlk* blk = effectiveDest->AsBlk();
9654 blockWidth = blk->gtBlkSize;
9655 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9656 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9658 destAddr = blk->Addr();
9661 if (destAddr != nullptr)
9663 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9664 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9667 destLclNum = lclVarTree->gtLclNum;
9668 destLclVar = &lvaTable[destLclNum];
9673 if (destLclVar != nullptr)
9675 #if LOCAL_ASSERTION_PROP
9676 // Kill everything about destLclNum (and its field locals)
9677 if (optLocalAssertionProp)
9679 if (optAssertionCount > 0)
9681 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9684 #endif // LOCAL_ASSERTION_PROP
9686 if (destLclVar->lvPromoted && blockWidthIsConst)
9688 noway_assert(varTypeIsStruct(destLclVar));
9689 noway_assert(!opts.MinOpts());
9691 if (blockWidth == destLclVar->lvExactSize)
9693 JITDUMP(" (destDoFldAsg=true)");
9694 // We may decide later that a copyblk is required when this struct has holes
9695 destDoFldAsg = true;
9699 JITDUMP(" with mismatched dest size");
9704 FieldSeqNode* srcFldSeq = nullptr;
9705 unsigned srcLclNum = BAD_VAR_NUM;
9706 LclVarDsc* srcLclVar = nullptr;
9707 bool srcDoFldAsg = false;
9711 srcLclVarTree = rhs->AsLclVarCommon();
9712 srcLclNum = srcLclVarTree->gtLclNum;
9713 if (rhs->OperGet() == GT_LCL_FLD)
9715 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9718 else if (rhs->OperIsIndir())
9720 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9722 srcLclNum = srcLclVarTree->gtLclNum;
9726 srcAddr = rhs->gtOp.gtOp1;
9730 if (srcLclNum != BAD_VAR_NUM)
9732 srcLclVar = &lvaTable[srcLclNum];
9734 if (srcLclVar->lvPromoted && blockWidthIsConst)
9736 noway_assert(varTypeIsStruct(srcLclVar));
9737 noway_assert(!opts.MinOpts());
9739 if (blockWidth == srcLclVar->lvExactSize)
9741 JITDUMP(" (srcDoFldAsg=true)");
9742 // We may decide later that a copyblk is required when this struct has holes
9747 JITDUMP(" with mismatched src size");
9752 // Check to see if we are required to do a copy block because the struct contains holes
9753 // and either the src or dest is externally visible
9755 bool requiresCopyBlock = false;
9756 bool srcSingleLclVarAsg = false;
9757 bool destSingleLclVarAsg = false;
9759 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9761 // Self-assign; no effect.
9762 GenTree* nop = gtNewNothingNode();
9763 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9767 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
9768 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
9770 requiresCopyBlock = true;
9773 // Can we use field by field assignment for the dest?
9774 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9776 JITDUMP(" dest contains custom layout and contains holes");
9777 // C++ style CopyBlock with holes
9778 requiresCopyBlock = true;
9781 // Can we use field by field assignment for the src?
9782 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
9784 JITDUMP(" src contains custom layout and contains holes");
9785 // C++ style CopyBlock with holes
9786 requiresCopyBlock = true;
9789 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
9791 requiresCopyBlock = true;
9794 // Can't use field by field assignment if the src is a call.
9795 if (rhs->OperGet() == GT_CALL)
9797 JITDUMP(" src is a call");
9798 // C++ style CopyBlock with holes
9799 requiresCopyBlock = true;
9802 // If we passed the above checks, then we will check these two
9803 if (!requiresCopyBlock)
9805 // Are both dest and src promoted structs?
9806 if (destDoFldAsg && srcDoFldAsg)
9808 // Both structs should be of the same type, or each have a single field of the same type.
9809 // If not we will use a copy block.
9810 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
9811 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
9813 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
9814 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
9815 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
9816 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
9818 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
9819 JITDUMP(" with mismatched types");
9823 // Are neither dest or src promoted structs?
9824 else if (!destDoFldAsg && !srcDoFldAsg)
9826 requiresCopyBlock = true; // Leave as a CopyBlock
9827 JITDUMP(" with no promoted structs");
9829 else if (destDoFldAsg)
9831 // Match the following kinds of trees:
9832 // fgMorphTree BB01, stmt 9 (before)
9833 // [000052] ------------ const int 8
9834 // [000053] -A--G------- copyBlk void
9835 // [000051] ------------ addr byref
9836 // [000050] ------------ lclVar long V07 loc5
9837 // [000054] --------R--- <list> void
9838 // [000049] ------------ addr byref
9839 // [000048] ------------ lclVar struct(P) V06 loc4
9840 // long V06.h (offs=0x00) -> V17 tmp9
9841 // Yields this transformation
9842 // fgMorphCopyBlock (after):
9843 // [000050] ------------ lclVar long V07 loc5
9844 // [000085] -A---------- = long
9845 // [000083] D------N---- lclVar long V17 tmp9
9847 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
9848 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
9850 // Reject the following tree:
9851 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
9853 // fgMorphTree BB01, stmt 6 (before)
9854 // [000038] ------------- const int 4
9855 // [000039] -A--G-------- copyBlk void
9856 // [000037] ------------- addr byref
9857 // [000036] ------------- lclVar int V05 loc3
9858 // [000040] --------R---- <list> void
9859 // [000035] ------------- addr byref
9860 // [000034] ------------- lclVar struct(P) V04 loc2
9861 // float V04.f1 (offs=0x00) -> V13 tmp6
9862 // As this would framsform into
9863 // float V13 = int V05
9865 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
9866 var_types destType = lvaTable[fieldLclNum].TypeGet();
9867 if (srcLclVar->TypeGet() == destType)
9869 srcSingleLclVarAsg = true;
9875 assert(srcDoFldAsg);
9876 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
9878 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
9879 // /--* byref V18._value (offs=0x00) -> V30 tmp21
9880 // [000245] -A------R--- * = struct (copy)
9881 // [000244] -----+------ \--* obj(8) struct
9882 // [000243] -----+------ \--* addr byref
9883 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
9885 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
9886 (blockWidth == genTypeSize(destLclVar->TypeGet())))
9888 // Check for type agreement
9889 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
9890 var_types srcType = lvaTable[fieldLclNum].TypeGet();
9891 if (destLclVar->TypeGet() == srcType)
9893 destSingleLclVarAsg = true;
9899 // If we require a copy block the set both of the field assign bools to false
9900 if (requiresCopyBlock)
9902 // If a copy block is required then we won't do field by field assignments
9903 destDoFldAsg = false;
9904 srcDoFldAsg = false;
9907 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
9909 // Mark the dest/src structs as DoNotEnreg
9910 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
9911 // or the struct is not promoted
9913 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
9915 if (!destLclVar->lvRegStruct)
9917 // Mark it as DoNotEnregister.
9918 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9922 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
9924 if (!srcLclVar->lvRegStruct)
9926 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
9930 if (requiresCopyBlock)
9932 #if CPU_USES_BLOCK_MOVE
9933 compBlkOpUsed = true;
9935 var_types asgType = dest->TypeGet();
9936 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
9937 asg->gtOp.gtOp1 = dest;
9938 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9940 // Note that the unrolling of CopyBlk is only implemented on some platforms.
9941 // Currently that includes x64 and ARM but not x86: the code generation for this
9942 // construct requires the ability to mark certain regions of the generated code
9943 // as non-interruptible, and the GC encoding for the latter platform does not
9944 // have this capability.
9946 // If we have a CopyObj with a dest on the stack
9947 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
9948 // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
9949 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
9951 if (destOnStack && (dest->OperGet() == GT_OBJ))
9953 fgMorphUnsafeBlk(dest->AsObj());
9956 // Eliminate the "OBJ or BLK" node on the rhs.
9957 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
9958 asg->gtOp.gtOp2 = rhs;
9960 #ifdef LEGACY_BACKEND
9961 if (!rhs->OperIsIndir())
9963 noway_assert(rhs->gtOper == GT_LCL_VAR);
9964 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
9965 rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
9967 #endif // LEGACY_BACKEND
9968 // Formerly, liveness did not consider copyblk arguments of simple types as being
9969 // a use or def, so these variables were marked as address-exposed.
9970 // TODO-1stClassStructs: This should no longer be needed.
9971 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
9973 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
9974 lvaTable[srcLclNum].lvAddrExposed = true;
9977 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
9979 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
9980 lvaTable[destLclNum].lvAddrExposed = true;
9987 // Otherwise we convert this CopyBlock into individual field by field assignments
9992 GenTreePtr addrSpill = nullptr;
9993 unsigned addrSpillTemp = BAD_VAR_NUM;
9994 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
9996 unsigned fieldCnt = DUMMY_INIT(0);
9998 if (destDoFldAsg && srcDoFldAsg)
10000 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10001 // All of these conditions were checked above...
10002 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10003 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10005 fieldCnt = destLclVar->lvFieldCnt;
10006 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10009 else if (destDoFldAsg)
10011 fieldCnt = destLclVar->lvFieldCnt;
10012 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10013 if (srcAddr == nullptr)
10015 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10020 assert(srcDoFldAsg);
10021 fieldCnt = srcLclVar->lvFieldCnt;
10022 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10023 if (dest->OperIsBlk())
10025 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10027 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10032 noway_assert(!srcDoFldAsg);
10033 if (gtClone(srcAddr))
10035 // srcAddr is simple expression. No need to spill.
10036 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10040 // srcAddr is complex expression. Clone and spill it (unless the destination is
10041 // a struct local that only has one field, in which case we'd only use the
10042 // address value once...)
10043 if (destLclVar->lvFieldCnt > 1)
10045 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10046 noway_assert(addrSpill != nullptr);
10053 noway_assert(!destDoFldAsg);
10055 // If we're doing field-wise stores, to an address within a local, and we copy
10056 // the address into "addrSpill", do *not* declare the original local var node in the
10057 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10058 // field-wise assignments as an "indirect" assignment to the local.
10059 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10061 if (lclVarTree != nullptr)
10063 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10066 if (gtClone(destAddr))
10068 // destAddr is simple expression. No need to spill
10069 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10073 // destAddr is complex expression. Clone and spill it (unless
10074 // the source is a struct local that only has one field, in which case we'd only
10075 // use the address value once...)
10076 if (srcLclVar->lvFieldCnt > 1)
10078 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10079 noway_assert(addrSpill != nullptr);
10082 // TODO-CQ: this should be based on a more general
10083 // "BaseAddress" method, that handles fields of structs, before or after
10085 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10087 if (addrSpill->gtOp.gtOp1->IsLocal())
10089 // We will *not* consider this to define the local, but rather have each individual field assign
10090 // be a definition.
10091 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10092 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10093 PROMOTION_TYPE_INDEPENDENT);
10094 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10095 // local stack frame
10101 if (addrSpill != nullptr)
10103 // Spill the (complex) address to a BYREF temp.
10104 // Note, at most one address may need to be spilled.
10105 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10107 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10109 if (addrSpillIsStackDest)
10111 lvaTable[addrSpillTemp].lvStackByref = true;
10114 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10116 #ifndef LEGACY_BACKEND
10117 // If we are assigning the address of a LclVar here
10118 // liveness does not account for this kind of address taken use.
10120 // We have to mark this local as address exposed so
10121 // that we don't delete the definition for this LclVar
10122 // as a dead store later on.
10124 if (addrSpill->OperGet() == GT_ADDR)
10126 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10127 if (addrOp->IsLocal())
10129 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10130 lvaTable[lclVarNum].lvAddrExposed = true;
10131 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10134 #endif // !LEGACY_BACKEND
10139 for (unsigned i = 0; i < fieldCnt; ++i)
10141 FieldSeqNode* curFieldSeq = nullptr;
10144 noway_assert(destLclNum != BAD_VAR_NUM);
10145 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10146 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10147 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10148 if (destAddr != nullptr)
10150 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10151 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10155 noway_assert(lclVarTree != nullptr);
10156 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10158 // Don't CSE the lhs of an assignment.
10159 dest->gtFlags |= GTF_DONT_CSE;
10163 noway_assert(srcDoFldAsg);
10164 noway_assert(srcLclNum != BAD_VAR_NUM);
10165 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10167 if (destSingleLclVarAsg)
10169 noway_assert(fieldCnt == 1);
10170 noway_assert(destLclVar != nullptr);
10171 noway_assert(addrSpill == nullptr);
10173 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10179 assert(addrSpillTemp != BAD_VAR_NUM);
10180 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10184 dest = gtCloneExpr(destAddr);
10185 noway_assert(dest != nullptr);
10187 // Is the address of a local?
10188 GenTreeLclVarCommon* lclVarTree = nullptr;
10189 bool isEntire = false;
10190 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10191 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10193 lclVarTree->gtFlags |= GTF_VAR_DEF;
10196 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10201 GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10202 // Have to set the field sequence -- which means we need the field handle.
10203 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10204 CORINFO_FIELD_HANDLE fieldHnd =
10205 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10206 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10207 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10209 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10211 dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10213 // !!! The destination could be on stack. !!!
10214 // This flag will let us choose the correct write barrier.
10215 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10221 noway_assert(srcLclNum != BAD_VAR_NUM);
10222 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10223 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10225 noway_assert(srcLclVarTree != nullptr);
10226 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10227 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10228 // but they are when they are under a GT_ADDR.
10229 src->gtFlags |= GTF_DONT_CSE;
10233 noway_assert(destDoFldAsg);
10234 noway_assert(destLclNum != BAD_VAR_NUM);
10235 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10237 if (srcSingleLclVarAsg)
10239 noway_assert(fieldCnt == 1);
10240 noway_assert(srcLclVar != nullptr);
10241 noway_assert(addrSpill == nullptr);
10243 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10249 assert(addrSpillTemp != BAD_VAR_NUM);
10250 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10254 src = gtCloneExpr(srcAddr);
10255 noway_assert(src != nullptr);
10258 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10259 CORINFO_FIELD_HANDLE fieldHnd =
10260 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10261 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10263 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10264 new (this, GT_CNS_INT)
10265 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10267 src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10271 noway_assert(dest->TypeGet() == src->TypeGet());
10273 asg = gtNewAssignNode(dest, src);
10275 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10276 // and it was of a local, record the assignment as an indirect update of a local.
10277 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10279 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10280 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10281 IndirectAssignmentAnnotation* pIndirAnnot =
10282 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10283 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10286 #if LOCAL_ASSERTION_PROP
10287 if (optLocalAssertionProp)
10289 optAssertionGen(asg);
10291 #endif // LOCAL_ASSERTION_PROP
10295 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10306 tree->gtFlags |= GTF_LATE_ARG;
10310 if (tree != oldTree)
10312 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10317 printf("\nfgMorphCopyBlock (after):\n");
10326 // insert conversions and normalize to make tree amenable to register
10327 // FP architectures
10328 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10330 if (tree->OperIsArithmetic())
10332 if (varTypeIsFloating(tree))
10334 GenTreePtr op1 = tree->gtOp.gtOp1;
10335 GenTreePtr op2 = tree->gtGetOp2();
10337 if (op1->TypeGet() != tree->TypeGet())
10339 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10341 if (op2->TypeGet() != tree->TypeGet())
10343 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10347 else if (tree->OperIsCompare())
10349 GenTreePtr op1 = tree->gtOp.gtOp1;
10351 if (varTypeIsFloating(op1))
10353 GenTreePtr op2 = tree->gtGetOp2();
10354 assert(varTypeIsFloating(op2));
10356 if (op1->TypeGet() != op2->TypeGet())
10358 // both had better be floating, just one bigger than other
10359 if (op1->TypeGet() == TYP_FLOAT)
10361 assert(op2->TypeGet() == TYP_DOUBLE);
10362 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10364 else if (op2->TypeGet() == TYP_FLOAT)
10366 assert(op1->TypeGet() == TYP_DOUBLE);
10367 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10376 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10378 GenTree* op1 = compare->gtOp.gtOp1;
10379 GenTree* op2 = compare->gtOp.gtOp2;
10381 GenTreeCall* opCall;
10383 // recognize this pattern:
10385 // stmtExpr void (IL 0x000... ???)
10389 // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
10390 // const(h) long 0x7fed96836c8 class
10392 // ld.lclVar struct V00 arg0
10395 // which comes from this code (reported by customer as being slow) :
10397 // private static bool IsNull<T>(T arg)
10399 // return arg==null;
10403 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10406 opCall = op2->AsCall();
10408 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10411 opCall = op1->AsCall();
10418 if (!opCns->IsIntegralConst(0))
10423 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10428 // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10429 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
10433 compare->gtOp.gtOp1 = newOp;
10437 compare->gtOp.gtOp2 = newOp;
10443 #ifdef FEATURE_SIMD
10445 //--------------------------------------------------------------------------------------------------------------
10446 // getSIMDStructFromField:
10447 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10448 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
10449 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10450 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10451 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10452 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10453 // if the struct is a SIMD struct.
10456 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10457 // struct used for simd intrinsic or not.
10458 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10459 // to simd lclvar's base type.
10460 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10461 // equals to the index number of this field.
10462 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10463 // equals to the simd struct size which this tree belongs to.
10464 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10465 // the UsedInSIMDIntrinsic check.
10468 // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10469 // instrinic related field, return nullptr.
10472 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10473 var_types* pBaseTypeOut,
10474 unsigned* indexOut,
10475 unsigned* simdSizeOut,
10476 bool ignoreUsedInSIMDIntrinsic /*false*/)
10478 GenTreePtr ret = nullptr;
10479 if (tree->OperGet() == GT_FIELD)
10481 GenTreePtr objRef = tree->gtField.gtFldObj;
10482 if (objRef != nullptr)
10484 GenTreePtr obj = nullptr;
10485 if (objRef->gtOper == GT_ADDR)
10487 obj = objRef->gtOp.gtOp1;
10489 else if (ignoreUsedInSIMDIntrinsic)
10498 if (isSIMDTypeLocal(obj))
10500 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
10501 LclVarDsc* varDsc = &lvaTable[lclNum];
10502 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10504 *simdSizeOut = varDsc->lvExactSize;
10505 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10509 else if (obj->OperGet() == GT_SIMD)
10512 GenTreeSIMD* simdNode = obj->AsSIMD();
10513 *simdSizeOut = simdNode->gtSIMDSize;
10514 *pBaseTypeOut = simdNode->gtSIMDBaseType;
10518 if (ret != nullptr)
10520 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10521 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
10526 /*****************************************************************************
10527 * If a read operation tries to access simd struct field, then transform the
10528 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10529 * Otherwise, return the old tree.
10531 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10532 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10534 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10538 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10540 unsigned index = 0;
10541 var_types baseType = TYP_UNKNOWN;
10542 unsigned simdSize = 0;
10543 GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10544 if (simdStructNode != nullptr)
10546 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10547 GenTree* op2 = gtNewIconNode(index);
10548 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10550 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10556 /*****************************************************************************
10557 * Transform an assignment of a SIMD struct field to SIMD intrinsic
10558 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10559 * then return the old tree.
10561 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10562 * intrinsic, we will morph it as simd intrinsic set.
10564 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10568 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10570 assert(tree->OperGet() == GT_ASG);
10571 GenTreePtr op1 = tree->gtGetOp1();
10572 GenTreePtr op2 = tree->gtGetOp2();
10574 unsigned index = 0;
10575 var_types baseType = TYP_UNKNOWN;
10576 unsigned simdSize = 0;
10577 GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10578 if (simdOp1Struct != nullptr)
10580 // Generate the simd set intrinsic
10581 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10583 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10587 simdIntrinsicID = SIMDIntrinsicSetX;
10590 simdIntrinsicID = SIMDIntrinsicSetY;
10593 simdIntrinsicID = SIMDIntrinsicSetZ;
10596 simdIntrinsicID = SIMDIntrinsicSetW;
10599 noway_assert(!"There is no set intrinsic for index bigger than 3");
10602 GenTreePtr target = gtClone(simdOp1Struct);
10603 assert(target != nullptr);
10604 GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10605 tree->gtOp.gtOp1 = target;
10606 tree->gtOp.gtOp2 = simdTree;
10608 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10615 #endif // FEATURE_SIMD
10617 /*****************************************************************************
10619 * Transform the given GTK_SMPOP tree for code generation.
10623 #pragma warning(push)
10624 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10626 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10628 // this extra scope is a workaround for a gcc bug
10629 // the inline destructor for ALLOCA_CHECK confuses the control
10630 // flow and gcc thinks that the function never returns
10633 noway_assert(tree->OperKind() & GTK_SMPOP);
10635 /* The steps in this function are :
10636 o Perform required preorder processing
10637 o Process the first, then second operand, if any
10638 o Perform required postorder morphing
10639 o Perform optional postorder morphing if optimizing
10642 bool isQmarkColon = false;
10644 #if LOCAL_ASSERTION_PROP
10645 AssertionIndex origAssertionCount = DUMMY_INIT(0);
10646 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
10648 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10649 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
10654 #if !FEATURE_STACK_FP_X87
10655 tree = fgMorphForRegisterFP(tree);
10659 genTreeOps oper = tree->OperGet();
10660 var_types typ = tree->TypeGet();
10661 GenTreePtr op1 = tree->gtOp.gtOp1;
10662 GenTreePtr op2 = tree->gtGetOp2IfPresent();
10664 /*-------------------------------------------------------------------------
10665 * First do any PRE-ORDER processing
10670 // Some arithmetic operators need to use a helper call to the EE
10674 tree = fgDoNormalizeOnStore(tree);
10675 /* fgDoNormalizeOnStore can change op2 */
10676 noway_assert(op1 == tree->gtOp.gtOp1);
10677 op2 = tree->gtOp.gtOp2;
10679 #ifdef FEATURE_SIMD
10681 // We should check whether op2 should be assigned to a SIMD field or not.
10682 // If it is, we should tranlate the tree to simd intrinsic.
10683 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10684 GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10685 typ = tree->TypeGet();
10686 op1 = tree->gtGetOp1();
10687 op2 = tree->gtGetOp2();
10689 assert((tree == newTree) && (tree->OperGet() == oper));
10690 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10692 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10715 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10716 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
10717 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10718 // TODO-1stClassStructs: improve this.
10719 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10721 op1->gtFlags |= GTF_DONT_CSE;
10727 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10728 op1->gtFlags |= GTF_DONT_CSE;
10736 if (op1->OperKind() & GTK_RELOP)
10738 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
10739 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
10740 not need to materialize the result as a 0 or 1. */
10742 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
10743 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
10745 // Request that the codegen for op1 sets the condition flags
10746 // when it generates the code for op1.
10748 // Codegen for op1 must set the condition flags if
10749 // this method returns true.
10751 op1->gtRequestSetFlags();
10755 GenTreePtr effOp1 = op1->gtEffectiveVal();
10756 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
10757 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
10762 #if LOCAL_ASSERTION_PROP
10763 if (optLocalAssertionProp)
10766 isQmarkColon = true;
10771 return fgMorphArrayIndex(tree);
10774 return fgMorphCast(tree);
10778 #ifndef _TARGET_64BIT_
10779 if (typ == TYP_LONG)
10781 /* For (long)int1 * (long)int2, we dont actually do the
10782 casts, and just multiply the 32 bit values, which will
10783 give us the 64 bit result in edx:eax */
10786 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
10787 genActualType(op1->CastFromType()) == TYP_INT &&
10788 genActualType(op2->CastFromType()) == TYP_INT) &&
10789 !op1->gtOverflow() && !op2->gtOverflow())
10791 // The casts have to be of the same signedness.
10792 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
10794 // We see if we can force an int constant to change its signedness
10795 GenTreePtr constOp;
10796 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
10798 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
10801 goto NO_MUL_64RSLT;
10803 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
10804 constOp->gtFlags ^= GTF_UNSIGNED;
10806 goto NO_MUL_64RSLT;
10809 // The only combination that can overflow
10810 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
10811 goto NO_MUL_64RSLT;
10813 /* Remaining combinations can never overflow during long mul. */
10815 tree->gtFlags &= ~GTF_OVERFLOW;
10817 /* Do unsigned mul only if the casts were unsigned */
10819 tree->gtFlags &= ~GTF_UNSIGNED;
10820 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
10822 /* Since we are committing to GTF_MUL_64RSLT, we don't want
10823 the casts to be folded away. So morph the castees directly */
10825 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
10826 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
10828 // Propagate side effect flags up the tree
10829 op1->gtFlags &= ~GTF_ALL_EFFECT;
10830 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
10831 op2->gtFlags &= ~GTF_ALL_EFFECT;
10832 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
10834 // If the GT_MUL can be altogether folded away, we should do that.
10836 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
10837 opts.OptEnabled(CLFLG_CONSTANTFOLD))
10839 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
10840 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
10841 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
10842 tree = gtFoldExprConst(tree);
10843 noway_assert(tree->OperIsConst());
10847 tree->gtFlags |= GTF_MUL_64RSLT;
10849 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
10850 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
10852 // Insert GT_NOP nodes for the cast operands so that they do not get folded
10853 // And propagate the new flags. We don't want to CSE the casts because
10854 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
10856 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
10858 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
10859 op1->gtFlags &= ~GTF_ALL_EFFECT;
10860 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
10863 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
10865 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
10866 op2->gtFlags &= ~GTF_ALL_EFFECT;
10867 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
10870 op1->gtFlags |= GTF_DONT_CSE;
10871 op2->gtFlags |= GTF_DONT_CSE;
10873 tree->gtFlags &= ~GTF_ALL_EFFECT;
10874 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
10876 goto DONE_MORPHING_CHILDREN;
10878 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
10881 if (tree->gtOverflow())
10882 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
10884 helper = CORINFO_HELP_LMUL;
10886 goto USE_HELPER_FOR_ARITH;
10890 /* We are seeing this node again. We have decided to use
10891 GTF_MUL_64RSLT, so leave it alone. */
10893 assert(tree->gtIsValid64RsltMul());
10896 #endif // !_TARGET_64BIT_
10901 #ifndef _TARGET_64BIT_
10902 if (typ == TYP_LONG)
10904 helper = CORINFO_HELP_LDIV;
10905 goto USE_HELPER_FOR_ARITH;
10908 #if USE_HELPERS_FOR_INT_DIV
10909 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
10911 helper = CORINFO_HELP_DIV;
10912 goto USE_HELPER_FOR_ARITH;
10915 #endif // !_TARGET_64BIT_
10917 #ifndef LEGACY_BACKEND
10918 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
10920 op2 = gtFoldExprConst(op2);
10922 #endif // !LEGACY_BACKEND
10927 #ifndef _TARGET_64BIT_
10928 if (typ == TYP_LONG)
10930 helper = CORINFO_HELP_ULDIV;
10931 goto USE_HELPER_FOR_ARITH;
10933 #if USE_HELPERS_FOR_INT_DIV
10934 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
10936 helper = CORINFO_HELP_UDIV;
10937 goto USE_HELPER_FOR_ARITH;
10940 #endif // _TARGET_64BIT_
10945 if (varTypeIsFloating(typ))
10947 helper = CORINFO_HELP_DBLREM;
10949 if (op1->TypeGet() == TYP_FLOAT)
10951 if (op2->TypeGet() == TYP_FLOAT)
10953 helper = CORINFO_HELP_FLTREM;
10957 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10960 else if (op2->TypeGet() == TYP_FLOAT)
10962 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10964 goto USE_HELPER_FOR_ARITH;
10967 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
10968 // A similar optimization for signed mod will not work for a negative perfectly divisible
10969 // HI-word. To make it correct, we would need to divide without the sign and then flip the
10970 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
10971 goto ASSIGN_HELPER_FOR_MOD;
10975 #ifdef _TARGET_ARMARCH_
10977 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
10979 #else // _TARGET_XARCH
10980 /* If this is an unsigned long mod with op2 which is a cast to long from a
10981 constant int, then don't morph to a call to the helper. This can be done
10982 faster inline using idiv.
10986 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
10987 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
10988 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
10990 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
10991 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
10992 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
10993 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
10995 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
10996 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
10999 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11000 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11002 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11003 noway_assert(op1->TypeGet() == TYP_LONG);
11005 // Update flags for op1 morph
11006 tree->gtFlags &= ~GTF_ALL_EFFECT;
11008 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11010 // If op1 is a constant, then do constant folding of the division operator
11011 if (op1->gtOper == GT_CNS_NATIVELONG)
11013 tree = gtFoldExpr(tree);
11018 #endif // _TARGET_XARCH
11020 ASSIGN_HELPER_FOR_MOD:
11022 // For "val % 1", return 0 if op1 doesn't have any side effects
11023 // and we are not in the CSE phase, we cannot discard 'tree'
11024 // because it may contain CSE expressions that we haven't yet examined.
11026 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11028 if (op2->IsIntegralConst(1))
11030 GenTreePtr zeroNode = gtNewZeroConNode(typ);
11032 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11034 DEBUG_DESTROY_NODE(tree);
11039 #ifndef _TARGET_64BIT_
11040 if (typ == TYP_LONG)
11042 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11043 goto USE_HELPER_FOR_ARITH;
11046 #if USE_HELPERS_FOR_INT_DIV
11047 if (typ == TYP_INT)
11049 if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11051 helper = CORINFO_HELP_UMOD;
11052 goto USE_HELPER_FOR_ARITH;
11054 else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11056 helper = CORINFO_HELP_MOD;
11057 goto USE_HELPER_FOR_ARITH;
11061 #endif // !_TARGET_64BIT_
11063 #ifndef LEGACY_BACKEND
11064 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11066 op2 = gtFoldExprConst(op2);
11069 #ifdef _TARGET_ARM64_
11071 // For ARM64 we don't have a remainder instruction,
11072 // The architecture manual suggests the following transformation to
11073 // generate code for such operator:
11075 // a % b = a - (a / b) * b;
11077 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11078 // should already have done so and we do not introduce new modulus nodes in later phases.
11079 assert(!optValnumCSE_phase);
11080 tree = fgMorphModToSubMulDiv(tree->AsOp());
11081 op1 = tree->gtOp.gtOp1;
11082 op2 = tree->gtOp.gtOp2;
11083 #else //_TARGET_ARM64_
11084 // If b is not a power of 2 constant then lowering replaces a % b
11085 // with a - (a / b) * b and applies magic division optimization to
11086 // a / b. The code may already contain an a / b expression (e.g.
11087 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11088 // If we convert % to / here we give CSE the opportunity to eliminate
11089 // the redundant division. If there's no redundant division then
11090 // nothing is lost, lowering would have done this transform anyway.
11092 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11094 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11095 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11096 : static_cast<size_t>(abs(divisorValue));
11098 if (!isPow2(absDivisorValue))
11100 tree = fgMorphModToSubMulDiv(tree->AsOp());
11101 op1 = tree->gtOp.gtOp1;
11102 op2 = tree->gtOp.gtOp2;
11105 #endif //_TARGET_ARM64_
11106 #endif // !LEGACY_BACKEND
11109 USE_HELPER_FOR_ARITH:
11111 /* We have to morph these arithmetic operations into helper calls
11112 before morphing the arguments (preorder), else the arguments
11113 won't get correct values of fgPtrArgCntCur.
11114 However, try to fold the tree first in case we end up with a
11115 simple node which won't need a helper call at all */
11117 noway_assert(tree->OperIsBinary());
11119 GenTreePtr oldTree = tree;
11121 tree = gtFoldExpr(tree);
11123 // Were we able to fold it ?
11124 // Note that gtFoldExpr may return a non-leaf even if successful
11125 // e.g. for something like "expr / 1" - see also bug #290853
11126 if (tree->OperIsLeaf() || (oldTree != tree))
11129 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11132 // Did we fold it into a comma node with throw?
11133 if (tree->gtOper == GT_COMMA)
11135 noway_assert(fgIsCommaThrow(tree));
11136 return fgMorphTree(tree);
11139 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11142 // normalize small integer return values
11143 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11144 (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11146 // Small-typed return values are normalized by the callee
11147 op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11149 // Propagate GTF_COLON_COND
11150 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11152 tree->gtOp.gtOp1 = fgMorphCast(op1);
11154 // Propagate side effect flags
11155 tree->gtFlags &= ~GTF_ALL_EFFECT;
11156 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11165 // Check for typeof(...) == obj.GetType()
11166 // Also check for typeof(...) == typeof(...)
11167 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11168 // type handles and instances of System.Type
11169 // If this invariant is ever broken, the optimization will need updating
11170 CLANG_FORMAT_COMMENT_ANCHOR;
11172 #ifdef LEGACY_BACKEND
11173 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11174 ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11175 (op1->gtCall.gtCallType == CT_HELPER)) &&
11176 ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11177 (op2->gtCall.gtCallType == CT_HELPER)))
11179 if ((((op1->gtOper == GT_INTRINSIC) &&
11180 (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11181 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11182 (((op2->gtOper == GT_INTRINSIC) &&
11183 (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11184 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11187 GenTreePtr pGetClassFromHandle;
11188 GenTreePtr pGetType;
11190 #ifdef LEGACY_BACKEND
11191 bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11192 bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11194 bool bOp1ClassFromHandle =
11195 op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11196 bool bOp2ClassFromHandle =
11197 op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11200 // Optimize typeof(...) == typeof(...)
11201 // Typically this occurs in generic code that attempts a type switch
11202 // e.g. typeof(T) == typeof(int)
11204 if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11206 GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11207 GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11209 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11211 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11213 // Morph and return
11214 return fgMorphTree(compare);
11216 else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11219 // Now check for GetClassFromHandle(handle) == obj.GetType()
11222 if (bOp1ClassFromHandle)
11224 pGetClassFromHandle = tree->gtOp.gtOp1;
11229 pGetClassFromHandle = tree->gtOp.gtOp2;
11233 GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11234 GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
11236 // Unwrap GT_NOP node used to prevent constant folding
11237 if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11239 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11242 // In the ngen case, we have to go thru an indirection to get the right handle.
11243 if (pConstLiteral->gtOper == GT_IND)
11245 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11247 #ifdef LEGACY_BACKEND
11249 if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11250 info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11251 CORINFO_INTRINSIC_Object_GetType &&
11253 if ((pGetType->gtOper == GT_INTRINSIC) &&
11254 (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11256 pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11258 CORINFO_CLASS_HANDLE clsHnd =
11259 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11261 if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11263 // Method Table tree
11264 CLANG_FORMAT_COMMENT_ANCHOR;
11265 #ifdef LEGACY_BACKEND
11266 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11268 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11270 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11271 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11272 optMethodFlags |= OMF_HAS_VTABLEREF;
11274 // Method table constant
11275 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11277 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11279 compare->gtFlags |=
11280 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11282 // Morph and return
11283 return fgMorphTree(compare);
11288 fgMorphRecognizeBoxNullable(tree);
11289 op1 = tree->gtOp.gtOp1;
11290 op2 = tree->gtGetOp2IfPresent();
11294 #ifdef _TARGET_ARM_
11296 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11298 switch (tree->TypeGet())
11301 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11303 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11315 #if !CPU_HAS_FP_SUPPORT
11316 tree = fgMorphToEmulatedFP(tree);
11319 /* Could this operator throw an exception? */
11320 if (fgGlobalMorph && tree->OperMayThrow())
11322 if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11324 /* Mark the tree node as potentially throwing an exception */
11325 tree->gtFlags |= GTF_EXCEPT;
11329 /*-------------------------------------------------------------------------
11330 * Process the first operand, if any
11336 #if LOCAL_ASSERTION_PROP
11337 // If we are entering the "then" part of a Qmark-Colon we must
11338 // save the state of the current copy assignment table
11339 // so that we can restore this state when entering the "else" part
11342 noway_assert(optLocalAssertionProp);
11343 if (optAssertionCount)
11345 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11346 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11347 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11348 origAssertionCount = optAssertionCount;
11349 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11353 origAssertionCount = 0;
11354 origAssertionTab = nullptr;
11357 #endif // LOCAL_ASSERTION_PROP
11359 // We might need a new MorphAddressContext context. (These are used to convey
11360 // parent context about how addresses being calculated will be used; see the
11361 // specification comment for MorphAddrContext for full details.)
11362 // Assume it's an Ind context to start.
11363 MorphAddrContext subIndMac1(MACK_Ind);
11364 MorphAddrContext* subMac1 = mac;
11365 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11367 switch (tree->gtOper)
11370 if (subMac1 == nullptr)
11372 subMac1 = &subIndMac1;
11373 subMac1->m_kind = MACK_Addr;
11377 // In a comma, the incoming context only applies to the rightmost arg of the
11378 // comma list. The left arg (op1) gets a fresh context.
11385 subMac1 = &subIndMac1;
11392 // For additions, if we're in an IND context keep track of whether
11393 // all offsets added to the address are constant, and their sum.
11394 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11396 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11397 GenTreePtr otherOp = tree->gtOp.gtOp2;
11398 // Is the other operator a constant?
11399 if (otherOp->IsCnsIntOrI())
11401 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11402 totalOffset += otherOp->gtIntConCommon.IconValue();
11403 if (totalOffset.IsOverflow())
11405 // We will consider an offset so large as to overflow as "not a constant" --
11406 // we will do a null check.
11407 subMac1->m_allConstantOffsets = false;
11411 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11416 subMac1->m_allConstantOffsets = false;
11420 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11422 #if LOCAL_ASSERTION_PROP
11423 // If we are exiting the "then" part of a Qmark-Colon we must
11424 // save the state of the current copy assignment table
11425 // so that we can merge this state with the "else" part exit
11428 noway_assert(optLocalAssertionProp);
11429 if (optAssertionCount)
11431 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11432 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11433 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11434 thenAssertionCount = optAssertionCount;
11435 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11439 thenAssertionCount = 0;
11440 thenAssertionTab = nullptr;
11443 #endif // LOCAL_ASSERTION_PROP
11445 /* Morphing along with folding and inlining may have changed the
11446 * side effect flags, so we have to reset them
11448 * NOTE: Don't reset the exception flags on nodes that may throw */
11450 noway_assert(tree->gtOper != GT_CALL);
11452 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11454 tree->gtFlags &= ~GTF_CALL;
11457 if (!tree->OperMayThrow())
11459 tree->gtFlags &= ~GTF_EXCEPT;
11462 /* Propagate the new flags */
11463 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11465 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11466 // Similarly for clsVar
11467 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11469 tree->gtFlags &= ~GTF_GLOB_REF;
11473 /*-------------------------------------------------------------------------
11474 * Process the second operand, if any
11480 #if LOCAL_ASSERTION_PROP
11481 // If we are entering the "else" part of a Qmark-Colon we must
11482 // reset the state of the current copy assignment table
11485 noway_assert(optLocalAssertionProp);
11486 optAssertionReset(0);
11487 if (origAssertionCount)
11489 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11490 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11491 optAssertionReset(origAssertionCount);
11494 #endif // LOCAL_ASSERTION_PROP
11496 // We might need a new MorphAddressContext context to use in evaluating op2.
11497 // (These are used to convey parent context about how addresses being calculated
11498 // will be used; see the specification comment for MorphAddrContext for full details.)
11499 // Assume it's an Ind context to start.
11500 MorphAddrContext subIndMac2(MACK_Ind);
11501 switch (tree->gtOper)
11504 if (mac != nullptr && mac->m_kind == MACK_Ind)
11506 GenTreePtr otherOp = tree->gtOp.gtOp1;
11507 // Is the other operator a constant?
11508 if (otherOp->IsCnsIntOrI())
11510 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11514 mac->m_allConstantOffsets = false;
11521 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11523 /* Propagate the side effect flags from op2 */
11525 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11527 #if LOCAL_ASSERTION_PROP
11528 // If we are exiting the "else" part of a Qmark-Colon we must
11529 // merge the state of the current copy assignment table with
11530 // that of the exit of the "then" part.
11533 noway_assert(optLocalAssertionProp);
11534 // If either exit table has zero entries then
11535 // the merged table also has zero entries
11536 if (optAssertionCount == 0 || thenAssertionCount == 0)
11538 optAssertionReset(0);
11542 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11543 if ((optAssertionCount != thenAssertionCount) ||
11544 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11546 // Yes they are different so we have to find the merged set
11547 // Iterate over the copy asgn table removing any entries
11548 // that do not have an exact match in the thenAssertionTab
11549 AssertionIndex index = 1;
11550 while (index <= optAssertionCount)
11552 AssertionDsc* curAssertion = optGetAssertion(index);
11554 for (unsigned j = 0; j < thenAssertionCount; j++)
11556 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11558 // Do the left sides match?
11559 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11560 (curAssertion->assertionKind == thenAssertion->assertionKind))
11562 // Do the right sides match?
11563 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11564 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11575 // If we fall out of the loop above then we didn't find
11576 // any matching entry in the thenAssertionTab so it must
11577 // have been killed on that path so we remove it here
11580 // The data at optAssertionTabPrivate[i] is to be removed
11581 CLANG_FORMAT_COMMENT_ANCHOR;
11585 printf("The QMARK-COLON ");
11587 printf(" removes assertion candidate #%d\n", index);
11590 optAssertionRemove(index);
11593 // The data at optAssertionTabPrivate[i] is to be kept
11599 #endif // LOCAL_ASSERTION_PROP
11602 DONE_MORPHING_CHILDREN:
11604 /*-------------------------------------------------------------------------
11605 * Now do POST-ORDER processing
11608 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11609 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11610 // is potentially too conservative, since they'll get treated as having side effects.
11611 // It is important to mark them as calls so if they are part of an argument list,
11612 // they will get sorted and processed properly (for example, it is important to handle
11613 // all nested calls before putting struct arguments in the argument registers). We
11614 // could mark the trees just before argument processing, but it would require a full
11615 // tree walk of the argument tree, so we just do it here, instead, even though we'll
11616 // mark non-argument trees (that will still get converted to calls, anyway).
11617 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11619 tree->gtFlags |= GTF_CALL;
11621 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11623 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11624 (op2 && !varTypeIsGC(op2->TypeGet())))
11626 // The tree is really not GC but was marked as such. Now that the
11627 // children have been unmarked, unmark the tree too.
11629 // Remember that GT_COMMA inherits it's type only from op2
11630 if (tree->gtOper == GT_COMMA)
11632 tree->gtType = genActualType(op2->TypeGet());
11636 tree->gtType = genActualType(op1->TypeGet());
11640 GenTreePtr oldTree = tree;
11642 GenTreePtr qmarkOp1 = nullptr;
11643 GenTreePtr qmarkOp2 = nullptr;
11645 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11647 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11648 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11651 // Try to fold it, maybe we get lucky,
11652 tree = gtFoldExpr(tree);
11654 if (oldTree != tree)
11656 /* if gtFoldExpr returned op1 or op2 then we are done */
11657 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11662 /* If we created a comma-throw tree then we need to morph op1 */
11663 if (fgIsCommaThrow(tree))
11665 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11666 fgMorphTreeDone(tree);
11672 else if (tree->OperKind() & GTK_CONST)
11677 /* gtFoldExpr could have used setOper to change the oper */
11678 oper = tree->OperGet();
11679 typ = tree->TypeGet();
11681 /* gtFoldExpr could have changed op1 and op2 */
11682 op1 = tree->gtOp.gtOp1;
11683 op2 = tree->gtGetOp2IfPresent();
11685 // Do we have an integer compare operation?
11687 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11689 // Are we comparing against zero?
11691 if (op2->IsIntegralConst(0))
11693 // Request that the codegen for op1 sets the condition flags
11694 // when it generates the code for op1.
11696 // Codegen for op1 must set the condition flags if
11697 // this method returns true.
11699 op1->gtRequestSetFlags();
11702 /*-------------------------------------------------------------------------
11703 * Perform the required oper-specific postorder morphing
11707 GenTreePtr cns1, cns2;
11708 GenTreePtr thenNode;
11709 GenTreePtr elseNode;
11710 size_t ival1, ival2;
11711 GenTreePtr lclVarTree;
11712 GenTreeLclVarCommon* lclVarCmnTree;
11713 FieldSeqNode* fieldSeq = nullptr;
11719 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
11720 if (lclVarTree != nullptr)
11722 lclVarTree->gtFlags |= GTF_VAR_DEF;
11725 if (op1->gtEffectiveVal()->OperIsConst())
11727 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
11728 tree->gtOp.gtOp1 = op1;
11731 /* If we are storing a small type, we might be able to omit a cast */
11732 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
11734 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
11736 var_types castType = op2->CastToType();
11738 // If we are performing a narrowing cast and
11739 // castType is larger or the same as op1's type
11740 // then we can discard the cast.
11742 if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
11744 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
11747 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
11749 /* We don't need to zero extend the setcc instruction */
11750 op2->gtType = TYP_BYTE;
11753 // If we introduced a CSE we may need to undo the optimization above
11754 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
11755 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
11756 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
11758 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11759 LclVarDsc* varDsc = &lvaTable[varNum];
11761 /* We again need to zero extend the setcc instruction */
11762 op2->gtType = varDsc->TypeGet();
11764 fgAssignSetVarDef(tree);
11782 /* We can't CSE the LHS of an assignment */
11783 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
11784 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11786 op1->gtFlags |= GTF_DONT_CSE;
11793 /* Make sure we're allowed to do this */
11795 if (optValnumCSE_phase)
11797 // It is not safe to reorder/delete CSE's
11803 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
11805 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
11807 op1 = tree->gtOp.gtOp1;
11809 /* Since this can occur repeatedly we use a while loop */
11811 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
11812 (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
11813 (op1->gtOverflow() == false))
11815 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
11817 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
11818 ival2 = cns2->gtIntCon.gtIconVal;
11820 if (op1->gtOper == GT_ADD)
11828 cns2->gtIntCon.gtIconVal = ival2;
11830 #ifdef _TARGET_64BIT_
11831 // we need to properly re-sign-extend or truncate as needed.
11832 cns2->AsIntCon()->TruncateOrSignExtend32();
11833 #endif // _TARGET_64BIT_
11835 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
11840 // Here we look for the following tree
11846 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
11848 // cast to unsigned allows test for both 0 and 1
11849 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
11851 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
11853 else // cast to UINT64 allows test for both 0 and 1
11854 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
11856 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
11859 if (ival2 != INT_MAX)
11861 // If we don't have a comma and relop, we can't do this optimization
11863 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
11865 // Here we look for the following transformation
11867 // EQ/NE Possible REVERSE(RELOP)
11869 // COMMA CNS 0/1 -> COMMA relop_op2
11871 // x RELOP x relop_op1
11873 // relop_op1 relop_op2
11877 GenTreePtr comma = op1;
11878 GenTreePtr relop = comma->gtOp.gtOp2;
11880 GenTreePtr relop_op1 = relop->gtOp.gtOp1;
11882 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
11886 gtReverseCond(relop);
11889 relop->gtOp.gtOp1 = comma;
11890 comma->gtOp.gtOp2 = relop_op1;
11892 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
11893 comma->gtFlags &= ~GTF_ALL_EFFECT;
11894 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
11895 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
11897 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
11898 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
11900 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
11905 if (op1->gtOper == GT_COMMA)
11907 // Here we look for the following tree
11908 // and when the LCL_VAR is a temp we can fold the tree:
11912 // COMMA CNS 0/1 -> RELOP CNS 0/1
11920 GenTreePtr asg = op1->gtOp.gtOp1;
11921 GenTreePtr lcl = op1->gtOp.gtOp2;
11923 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
11924 if (asg->gtOper != GT_ASG)
11929 /* The right side of the comma must be a LCL_VAR temp */
11930 if (lcl->gtOper != GT_LCL_VAR)
11935 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
11936 noway_assert(lclNum < lvaCount);
11938 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
11939 if (!lvaTable[lclNum].lvIsTemp)
11945 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
11946 // Fix 383856 X86/ARM ILGEN
11947 if (lclNumIsCSE(lclNum))
11953 /* We also must be assigning the result of a RELOP */
11954 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
11959 /* Both of the LCL_VAR must match */
11960 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
11965 /* If right side of asg is not a RELOP then skip */
11966 if (!asg->gtOp.gtOp2->OperIsCompare())
11971 LclVarDsc* varDsc = lvaTable + lclNum;
11973 /* Set op1 to the right side of asg, (i.e. the RELOP) */
11974 op1 = asg->gtOp.gtOp2;
11976 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
11977 DEBUG_DESTROY_NODE(lcl);
11979 /* This local variable should never be used again */
11981 // VSW 184221: Make RefCnt to zero to indicate that this local var
11982 // is not used any more. (Keey the lvType as is.)
11983 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
11984 // And then emitter::emitEndCodeGen will assert in the following line:
11985 // noway_assert( dsc->lvTracked);
11987 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
11988 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
11989 // and it only shows up twice.
11991 lvaTable[lclNum].lvRefCnt = 0;
11992 lvaTable[lclNum].lvaResetSortAgainFlag(this);
11995 if (op1->OperIsCompare())
11997 // Here we look for the following tree
11999 // EQ/NE -> RELOP/!RELOP
12004 // Note that we will remove/destroy the EQ/NE node and move
12005 // the RELOP up into it's location.
12007 /* Here we reverse the RELOP if necessary */
12009 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12013 gtReverseCond(op1);
12016 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12017 op1->gtType = tree->gtType;
12019 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12020 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12022 DEBUG_DESTROY_NODE(tree);
12027 // Now we check for a compare with the result of an '&' operator
12029 // Here we look for the following transformation:
12033 // AND CNS 0/1 -> AND CNS 0
12035 // RSZ/RSH CNS 1 x CNS (1 << y)
12039 if (op1->gtOper == GT_AND)
12041 GenTreePtr andOp = op1;
12042 GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12044 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12049 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12054 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12056 if (shiftAmount < 0)
12061 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12066 if (andOp->gtType == TYP_INT)
12068 if (shiftAmount > 31)
12073 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12075 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12077 // Reverse the cond if necessary
12080 gtReverseCond(tree);
12081 cns2->gtIntCon.gtIconVal = 0;
12082 oper = tree->gtOper;
12085 else if (andOp->gtType == TYP_LONG)
12087 if (shiftAmount > 63)
12092 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12094 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12096 // Reverse the cond if necessary
12099 gtReverseCond(tree);
12100 cns2->gtIntConCommon.SetLngValue(0);
12101 oper = tree->gtOper;
12105 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12107 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12108 DEBUG_DESTROY_NODE(rshiftOp);
12110 } // END if (ival2 != INT_MAX)
12113 /* Now check for compares with small constant longs that can be cast to int */
12115 if (!cns2->OperIsConst())
12120 if (cns2->TypeGet() != TYP_LONG)
12125 /* Is the constant 31 bits or smaller? */
12127 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12132 /* Is the first comparand mask operation of type long ? */
12134 if (op1->gtOper != GT_AND)
12136 /* Another interesting case: cast from int */
12138 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12139 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12140 !op1->gtOverflow()) // cannot be an overflow checking cast
12142 /* Simply make this into an integer comparison */
12144 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12145 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12151 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12153 /* Is the result of the mask effectively an INT ? */
12155 GenTreePtr andMask;
12156 andMask = op1->gtOp.gtOp2;
12157 if (andMask->gtOper != GT_CNS_NATIVELONG)
12161 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12166 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12168 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12170 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12172 noway_assert(andMask == op1->gtOp.gtOp2);
12174 ival1 = (int)andMask->gtIntConCommon.LngValue();
12175 andMask->SetOper(GT_CNS_INT);
12176 andMask->gtType = TYP_INT;
12177 andMask->gtIntCon.gtIconVal = ival1;
12179 /* now change the type of the AND node */
12181 op1->gtType = TYP_INT;
12183 /* finally we replace the comparand */
12185 ival2 = (int)cns2->gtIntConCommon.LngValue();
12186 cns2->SetOper(GT_CNS_INT);
12187 cns2->gtType = TYP_INT;
12189 noway_assert(cns2 == op2);
12190 cns2->gtIntCon.gtIconVal = ival2;
12199 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12201 if (op2->gtOper == GT_CNS_INT)
12204 /* Check for "expr relop 1" */
12205 if (cns2->IsIntegralConst(1))
12207 /* Check for "expr >= 1" */
12210 /* Change to "expr > 0" */
12214 /* Check for "expr < 1" */
12215 else if (oper == GT_LT)
12217 /* Change to "expr <= 0" */
12222 /* Check for "expr relop -1" */
12223 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12225 /* Check for "expr <= -1" */
12228 /* Change to "expr < 0" */
12232 /* Check for "expr > -1" */
12233 else if (oper == GT_GT)
12235 /* Change to "expr >= 0" */
12239 // IF we get here we should be changing 'oper'
12240 assert(tree->OperGet() != oper);
12242 // Keep the old ValueNumber for 'tree' as the new expr
12243 // will still compute the same value as before
12244 tree->SetOper(oper, GenTree::PRESERVE_VN);
12245 cns2->gtIntCon.gtIconVal = 0;
12247 // vnStore is null before the ValueNumber phase has run
12248 if (vnStore != nullptr)
12250 // Update the ValueNumber for 'cns2', as we just changed it to 0
12251 fgValueNumberTreeConst(cns2);
12254 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12259 else // we have an unsigned comparison
12261 if (op2->IsIntegralConst(0))
12263 if ((oper == GT_GT) || (oper == GT_LE))
12265 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12266 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12267 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12268 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12269 // occurs as a result of branch inversion.
12270 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12271 tree->SetOper(oper, GenTree::PRESERVE_VN);
12272 tree->gtFlags &= ~GTF_UNSIGNED;
12279 noway_assert(tree->OperKind() & GTK_RELOP);
12281 /* Check if the result of the comparison is used for a jump.
12282 * If not then only the int (i.e. 32 bit) case is handled in
12283 * the code generator through the (x86) "set" instructions.
12284 * For the rest of the cases, the simplest way is to
12285 * "simulate" the comparison with ?:
12287 * On ARM, we previously used the IT instruction, but the IT instructions
12288 * have mostly been declared obsolete and off-limits, so all cases on ARM
12289 * get converted to ?: */
12291 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12293 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12296 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12297 op1->gtRequestSetFlags();
12299 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12300 op2 = fgMorphTree(op2);
12302 tree = gtNewQmarkNode(TYP_INT, op1, op2);
12304 fgMorphTreeDone(tree);
12312 /* If op1 is a comma throw node then we won't be keeping op2 */
12313 if (fgIsCommaThrow(op1))
12318 /* Get hold of the two branches */
12320 noway_assert(op2->OperGet() == GT_COLON);
12321 elseNode = op2->AsColon()->ElseNode();
12322 thenNode = op2->AsColon()->ThenNode();
12324 /* Try to hoist assignments out of qmark colon constructs.
12325 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12327 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12328 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12329 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12331 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12333 GenTreePtr asg = thenNode;
12334 GenTreePtr colon = op2;
12335 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12336 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12337 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12338 asg->gtOp.gtOp2 = tree;
12340 // Asg will have all the flags that the QMARK had
12341 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12343 // Colon flag won't have the flags that x had.
12344 colon->gtFlags &= ~GTF_ALL_EFFECT;
12345 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12347 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12348 DEBUG_DESTROY_NODE(elseNode);
12353 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12355 if (elseNode->IsNothingNode())
12357 /* This can only happen for VOID ?: */
12358 noway_assert(op2->gtType == TYP_VOID);
12360 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12361 if (thenNode->IsNothingNode())
12363 // We may be able to throw away op1 (unless it has side-effects)
12365 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12367 /* Just return a a Nop Node */
12372 /* Just return the relop, but clear the special flags. Note
12373 that we can't do that for longs and floats (see code under
12374 COMPARE label above) */
12376 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12378 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12385 GenTreePtr tmp = elseNode;
12387 op2->AsColon()->ElseNode() = elseNode = thenNode;
12388 op2->AsColon()->ThenNode() = thenNode = tmp;
12389 gtReverseCond(op1);
12393 #if !defined(_TARGET_ARM_)
12394 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12396 // Don't do this optimization for ARM: we always require assignment
12397 // to boolean to remain ?:, since we don't have any way to generate
12398 // this with straight-line code, like x86 does using setcc (at least
12399 // after the IT instruction is deprecated).
12401 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12402 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12404 ival1 = thenNode->gtIntCon.gtIconVal;
12405 ival2 = elseNode->gtIntCon.gtIconVal;
12407 // Is one constant 0 and the other 1?
12408 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12410 // If the constants are {1, 0}, reverse the condition
12413 gtReverseCond(op1);
12416 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12417 // needs to materialize the result as a 0 or 1.
12418 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12419 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12421 DEBUG_DESTROY_NODE(tree);
12422 DEBUG_DESTROY_NODE(op2);
12427 #endif // !_TARGET_ARM_
12429 break; // end case GT_QMARK
12433 #ifndef _TARGET_64BIT_
12434 if (typ == TYP_LONG)
12436 // This must be GTF_MUL_64RSLT
12437 assert(tree->gtIsValid64RsltMul());
12440 #endif // _TARGET_64BIT_
12445 if (tree->gtOverflow())
12450 // TODO #4104: there are a lot of other places where
12451 // this condition is not checked before transformations.
12454 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12457 if (op2->IsCnsIntOrI())
12459 /* Negate the constant and change the node to be "+" */
12461 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12463 tree->ChangeOper(oper);
12467 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12470 if (op1->IsCnsIntOrI())
12472 noway_assert(varTypeIsIntOrI(tree));
12474 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12475 // node should be the same
12476 // as the type of the tree, i.e. tree->gtType.
12477 fgMorphTreeDone(op2);
12480 tree->ChangeOper(oper);
12484 /* No match - exit */
12488 #ifdef _TARGET_ARM64_
12490 if (!varTypeIsFloating(tree->gtType))
12492 // Codegen for this instruction needs to be able to throw two exceptions:
12493 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12494 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12498 // Codegen for this instruction needs to be able to throw one exception:
12499 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12506 if (tree->gtOverflow())
12508 tree->gtRequestSetFlags();
12510 // Add the excptn-throwing basic block to jump to on overflow
12512 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12514 // We can't do any commutative morphing for overflow instructions
12525 /* Commute any non-REF constants to the right */
12528 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12530 // TODO-Review: We used to assert here that
12531 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12532 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12533 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12534 // Task is to re-enable this assertion and investigate.
12536 /* Swap the operands */
12537 tree->gtOp.gtOp1 = op2;
12538 tree->gtOp.gtOp2 = op1;
12541 op2 = tree->gtOp.gtOp2;
12544 /* See if we can fold GT_ADD nodes. */
12546 if (oper == GT_ADD)
12548 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12550 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12551 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12552 !op1->gtOverflow() && !op2->gtOverflow())
12554 cns1 = op1->gtOp.gtOp2;
12555 cns2 = op2->gtOp.gtOp2;
12556 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12557 #ifdef _TARGET_64BIT_
12558 if (cns1->TypeGet() == TYP_INT)
12560 // we need to properly re-sign-extend or truncate after adding two int constants above
12561 cns1->AsIntCon()->TruncateOrSignExtend32();
12563 #endif //_TARGET_64BIT_
12565 tree->gtOp.gtOp2 = cns1;
12566 DEBUG_DESTROY_NODE(cns2);
12568 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12569 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12570 DEBUG_DESTROY_NODE(op2);
12571 op2 = tree->gtOp.gtOp2;
12574 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12576 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12578 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12579 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12581 cns1 = op1->gtOp.gtOp2;
12582 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12583 op2->gtIntConCommon.IconValue());
12584 #ifdef _TARGET_64BIT_
12585 if (op2->TypeGet() == TYP_INT)
12587 // we need to properly re-sign-extend or truncate after adding two int constants above
12588 op2->AsIntCon()->TruncateOrSignExtend32();
12590 #endif //_TARGET_64BIT_
12592 if (cns1->OperGet() == GT_CNS_INT)
12594 op2->gtIntCon.gtFieldSeq =
12595 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12597 DEBUG_DESTROY_NODE(cns1);
12599 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12600 DEBUG_DESTROY_NODE(op1);
12601 op1 = tree->gtOp.gtOp1;
12606 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12609 // If this addition is adding an offset to a null pointer,
12610 // avoid the work and yield the null pointer immediately.
12611 // Dereferencing the pointer in either case will have the
12614 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12615 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12617 op2->gtType = tree->gtType;
12618 DEBUG_DESTROY_NODE(op1);
12619 DEBUG_DESTROY_NODE(tree);
12623 // Remove the addition iff it won't change the tree type
12626 if (!gtIsActiveCSE_Candidate(op2) &&
12627 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12629 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12630 (op2->gtIntCon.gtFieldSeq != nullptr) &&
12631 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12633 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12636 DEBUG_DESTROY_NODE(op2);
12637 DEBUG_DESTROY_NODE(tree);
12644 /* See if we can fold GT_MUL by const nodes */
12645 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12647 #ifndef _TARGET_64BIT_
12648 noway_assert(typ <= TYP_UINT);
12649 #endif // _TARGET_64BIT_
12650 noway_assert(!tree->gtOverflow());
12652 ssize_t mult = op2->gtIntConCommon.IconValue();
12653 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12654 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12656 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12660 // We may be able to throw away op1 (unless it has side-effects)
12662 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12664 DEBUG_DESTROY_NODE(op1);
12665 DEBUG_DESTROY_NODE(tree);
12666 return op2; // Just return the "0" node
12669 // We need to keep op1 for the side-effects. Hang it off
12672 tree->ChangeOper(GT_COMMA);
12676 size_t abs_mult = (mult >= 0) ? mult : -mult;
12677 size_t lowestBit = genFindLowestBit(abs_mult);
12678 bool changeToShift = false;
12680 // is it a power of two? (positive or negative)
12681 if (abs_mult == lowestBit)
12683 // if negative negate (min-int does not need negation)
12684 if (mult < 0 && mult != SSIZE_T_MIN)
12686 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12687 fgMorphTreeDone(op1);
12690 // If "op2" is a constant array index, the other multiplicand must be a constant.
12691 // Transfer the annotation to the other one.
12692 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12693 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12695 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
12696 GenTreePtr otherOp = op1;
12697 if (otherOp->OperGet() == GT_NEG)
12699 otherOp = otherOp->gtOp.gtOp1;
12701 assert(otherOp->OperGet() == GT_CNS_INT);
12702 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
12703 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
12708 DEBUG_DESTROY_NODE(op2);
12709 DEBUG_DESTROY_NODE(tree);
12713 /* Change the multiplication into a shift by log2(val) bits */
12714 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
12715 changeToShift = true;
12718 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
12720 int shift = genLog2(lowestBit);
12721 ssize_t factor = abs_mult >> shift;
12723 if (factor == 3 || factor == 5 || factor == 9)
12725 // if negative negate (min-int does not need negation)
12726 if (mult < 0 && mult != SSIZE_T_MIN)
12728 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12729 fgMorphTreeDone(op1);
12732 GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
12733 if (op2IsConstIndex)
12735 factorIcon->AsIntCon()->gtFieldSeq =
12736 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
12739 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
12740 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
12741 fgMorphTreeDone(op1);
12743 op2->gtIntConCommon.SetIconValue(shift);
12744 changeToShift = true;
12747 #endif // LEA_AVAILABLE
12750 // vnStore is null before the ValueNumber phase has run
12751 if (vnStore != nullptr)
12753 // Update the ValueNumber for 'op2', as we just changed the constant
12754 fgValueNumberTreeConst(op2);
12757 // Keep the old ValueNumber for 'tree' as the new expr
12758 // will still compute the same value as before
12759 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
12761 goto DONE_MORPHING_CHILDREN;
12764 else if (fgOperIsBitwiseRotationRoot(oper))
12766 tree = fgRecognizeAndMorphBitwiseRotation(tree);
12768 // fgRecognizeAndMorphBitwiseRotation may return a new tree
12769 oper = tree->OperGet();
12770 typ = tree->TypeGet();
12771 op1 = tree->gtOp.gtOp1;
12772 op2 = tree->gtOp.gtOp2;
12781 /* Any constant cases should have been folded earlier */
12782 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
12787 noway_assert(varTypeIsFloating(op1->TypeGet()));
12789 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
12793 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
12794 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
12795 // is a local or clsVar, even if it has been address-exposed.
12796 if (op1->OperGet() == GT_ADDR)
12798 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
12804 // Can not remove a GT_IND if it is currently a CSE candidate.
12805 if (gtIsActiveCSE_Candidate(tree))
12810 bool foldAndReturnTemp;
12811 foldAndReturnTemp = false;
12815 /* Try to Fold *(&X) into X */
12816 if (op1->gtOper == GT_ADDR)
12818 // Can not remove a GT_ADDR if it is currently a CSE candidate.
12819 if (gtIsActiveCSE_Candidate(op1))
12824 temp = op1->gtOp.gtOp1; // X
12826 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
12827 // they are the *same* struct type. In fact, they almost certainly aren't. If the
12828 // address has an associated field sequence, that identifies this case; go through
12829 // the "lcl_fld" path rather than this one.
12830 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
12831 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
12833 foldAndReturnTemp = true;
12835 else if (temp->OperIsLocal())
12837 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
12838 LclVarDsc* varDsc = &lvaTable[lclNum];
12840 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
12841 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
12843 noway_assert(varTypeIsStruct(varDsc));
12845 // We will try to optimize when we have a single field struct that is being struct promoted
12846 if (varDsc->lvFieldCnt == 1)
12848 unsigned lclNumFld = varDsc->lvFieldLclStart;
12849 // just grab the promoted field
12850 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
12852 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
12854 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
12856 // We can just use the existing promoted field LclNum
12857 temp->gtLclVarCommon.SetLclNum(lclNumFld);
12858 temp->gtType = fieldVarDsc->TypeGet();
12860 foldAndReturnTemp = true;
12864 // If the type of the IND (typ) is a "small int", and the type of the local has the
12865 // same width, then we can reduce to just the local variable -- it will be
12866 // correctly normalized, and signed/unsigned differences won't matter.
12868 // The below transformation cannot be applied if the local var needs to be normalized on load.
12869 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
12870 !lvaTable[lclNum].lvNormalizeOnLoad())
12872 tree->gtType = typ = temp->TypeGet();
12873 foldAndReturnTemp = true;
12877 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
12879 assert(fieldSeq == nullptr);
12880 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
12881 assert(b || fieldSeq == nullptr);
12883 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
12885 // Append the field sequence, change the type.
12886 temp->AsLclFld()->gtFieldSeq =
12887 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
12888 temp->gtType = typ;
12890 foldAndReturnTemp = true;
12893 // Otherwise will will fold this into a GT_LCL_FLD below
12894 // where we check (temp != nullptr)
12896 else // !temp->OperIsLocal()
12898 // We don't try to fold away the GT_IND/GT_ADDR for this case
12902 else if (op1->OperGet() == GT_ADD)
12904 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
12906 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
12907 (!(opts.MinOpts() || opts.compDbgCode)))
12909 // No overflow arithmetic with pointers
12910 noway_assert(!op1->gtOverflow());
12912 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
12913 if (!temp->OperIsLocal())
12919 // Can not remove the GT_ADDR if it is currently a CSE candidate.
12920 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
12925 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12926 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
12928 // Does the address have an associated zero-offset field sequence?
12929 FieldSeqNode* addrFieldSeq = nullptr;
12930 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
12932 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
12935 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
12937 noway_assert(!varTypeIsGC(temp->TypeGet()));
12938 foldAndReturnTemp = true;
12942 // The emitter can't handle large offsets
12943 if (ival1 != (unsigned short)ival1)
12948 // The emitter can get confused by invalid offsets
12949 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
12954 #ifdef _TARGET_ARM_
12955 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
12957 if (varTypeIsFloating(typ))
12959 if ((ival1 % emitTypeSize(typ)) != 0)
12961 tree->gtFlags |= GTF_IND_UNALIGNED;
12967 // Now we can fold this into a GT_LCL_FLD below
12968 // where we check (temp != nullptr)
12972 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
12973 // - We may have a load of a local where the load has a different type than the local
12974 // - We may have a load of a local plus an offset
12976 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
12977 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
12978 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
12979 // out-of-bounds w.r.t. the local).
12980 if ((temp != nullptr) && !foldAndReturnTemp)
12982 assert(temp->OperIsLocal());
12984 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
12985 LclVarDsc* const varDsc = &lvaTable[lclNum];
12987 const var_types tempTyp = temp->TypeGet();
12988 const bool useExactSize =
12989 varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
12990 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
12992 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
12993 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
12994 // lclVar and must not extend beyond the end of the lclVar.
12995 if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize))
12997 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13001 // Make sure we don't separately promote the fields of this struct.
13002 if (varDsc->lvRegStruct)
13004 // We can enregister, but can't promote.
13005 varDsc->lvPromoted = false;
13009 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13012 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13013 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13014 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13016 if (temp->OperGet() == GT_LCL_FLD)
13018 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13019 temp->AsLclFld()->gtFieldSeq =
13020 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13024 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13025 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13026 if (fieldSeq != nullptr)
13027 { // If it does represent a field, note that.
13028 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13031 temp->gtType = tree->gtType;
13032 foldAndReturnTemp = true;
13036 if (foldAndReturnTemp)
13038 assert(temp != nullptr);
13039 assert(temp->TypeGet() == typ);
13040 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13042 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13043 // 'temp' because a GT_ADDR always marks it for its operand.
13044 temp->gtFlags &= ~GTF_DONT_CSE;
13045 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13047 if (op1->OperGet() == GT_ADD)
13049 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13050 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13052 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13053 DEBUG_DESTROY_NODE(tree); // GT_IND
13058 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13059 // could result in an invalid value number for the newly generated GT_IND node.
13060 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13062 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13063 // TBD: this transformation is currently necessary for correctness -- it might
13064 // be good to analyze the failures that result if we don't do this, and fix them
13065 // in other ways. Ideally, this should be optional.
13066 GenTreePtr commaNode = op1;
13067 unsigned treeFlags = tree->gtFlags;
13068 commaNode->gtType = typ;
13069 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13070 // dangerous, clear the GTF_REVERSE_OPS at
13073 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13075 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13077 commaNode = commaNode->gtOp.gtOp2;
13078 commaNode->gtType = typ;
13079 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13080 // dangerous, clear the GTF_REVERSE_OPS at
13083 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13086 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13090 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13092 GetArrayInfoMap()->Remove(tree);
13095 op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13096 op1->gtFlags = treeFlags;
13099 GetArrayInfoMap()->Set(op1, arrInfo);
13102 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13104 commaNode->gtOp.gtOp2 = op1;
13112 // Can not remove op1 if it is currently a CSE candidate.
13113 if (gtIsActiveCSE_Candidate(op1))
13118 if (op1->OperGet() == GT_IND)
13120 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13122 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13123 if (gtIsActiveCSE_Candidate(tree))
13128 // Perform the transform ADDR(IND(...)) == (...).
13129 GenTreePtr addr = op1->gtOp.gtOp1;
13131 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13133 DEBUG_DESTROY_NODE(op1);
13134 DEBUG_DESTROY_NODE(tree);
13139 else if (op1->OperGet() == GT_OBJ)
13141 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13142 if (gtIsActiveCSE_Candidate(tree))
13147 // Perform the transform ADDR(OBJ(...)) == (...).
13148 GenTreePtr addr = op1->AsObj()->Addr();
13150 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13152 DEBUG_DESTROY_NODE(op1);
13153 DEBUG_DESTROY_NODE(tree);
13157 else if (op1->gtOper == GT_CAST)
13159 GenTreePtr casting = op1->gtCast.CastOp();
13160 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13162 DEBUG_DESTROY_NODE(op1);
13163 tree->gtOp.gtOp1 = op1 = casting;
13166 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13168 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13169 // (Be sure to mark "z" as an l-value...)
13170 GenTreePtr commaNode = op1;
13171 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13173 commaNode = commaNode->gtOp.gtOp2;
13175 // The top-level addr might be annotated with a zeroOffset field.
13176 FieldSeqNode* zeroFieldSeq = nullptr;
13177 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13179 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13181 // If the node we're about to put under a GT_ADDR is an indirection, it
13182 // doesn't need to be materialized, since we only want the addressing mode. Because
13183 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13184 // as a side effect.
13185 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13186 if (commaOp2->OperIsBlk())
13188 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13190 if (commaOp2->gtOper == GT_IND)
13192 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13195 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13199 // Transfer the annotation to the new GT_ADDR node.
13200 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13202 commaNode->gtOp.gtOp2 = op1;
13203 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13204 // might give op1 a type different from byref (like, say, native int). So now go back and give
13205 // all the comma nodes the type of op1.
13206 // TODO: the comma flag update below is conservative and can be improved.
13207 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13208 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13210 while (commaNode->gtOper == GT_COMMA)
13212 commaNode->gtType = op1->gtType;
13213 commaNode->gtFlags |= op1->gtFlags;
13215 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13217 commaNode = commaNode->gtOp.gtOp2;
13223 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13224 op1->gtFlags |= GTF_DONT_CSE;
13230 /* Mark the nodes that are conditionally executed */
13231 fgWalkTreePre(&tree, gtMarkColonCond);
13233 /* Since we're doing this postorder we clear this if it got set by a child */
13234 fgRemoveRestOfBlock = false;
13239 /* Special case: trees that don't produce a value */
13240 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13243 typ = tree->gtType = TYP_VOID;
13246 // If we are in the Valuenum CSE phase then don't morph away anything as these
13247 // nodes may have CSE defs/uses in them.
13249 if (!optValnumCSE_phase)
13251 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13254 GenTreePtr op1SideEffects = nullptr;
13255 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13256 // hoisted expressions in loops.
13257 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13258 if (op1SideEffects)
13260 // Replace the left hand side with the side effect list.
13261 tree->gtOp.gtOp1 = op1SideEffects;
13262 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13266 /* The left operand is worthless, throw it away */
13267 if (lvaLocalVarRefCounted)
13269 lvaRecursiveDecRefCounts(op1);
13271 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13272 DEBUG_DESTROY_NODE(tree);
13273 DEBUG_DESTROY_NODE(op1);
13277 /* If the right operand is just a void nop node, throw it away */
13278 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13280 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13281 DEBUG_DESTROY_NODE(tree);
13282 DEBUG_DESTROY_NODE(op2);
13291 /* Special case if fgRemoveRestOfBlock is set to true */
13292 if (fgRemoveRestOfBlock)
13294 if (fgIsCommaThrow(op1, true))
13296 GenTreePtr throwNode = op1->gtOp.gtOp1;
13297 noway_assert(throwNode->gtType == TYP_VOID);
13302 noway_assert(op1->OperKind() & GTK_RELOP);
13303 noway_assert(op1->gtFlags & GTF_EXCEPT);
13305 // We need to keep op1 for the side-effects. Hang it off
13308 tree->ChangeOper(GT_COMMA);
13309 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13311 // Additionally since we're eliminating the JTRUE
13312 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13313 // So we change it into a GT_COMMA as well.
13314 op1->ChangeOper(GT_COMMA);
13315 op1->gtType = op1->gtOp.gtOp1->gtType;
13324 noway_assert(oper == tree->gtOper);
13326 // If we are in the Valuenum CSE phase then don't morph away anything as these
13327 // nodes may have CSE defs/uses in them.
13329 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13331 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13332 if (op1 && fgIsCommaThrow(op1, true))
13334 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13336 /* We can safely throw out the rest of the statements */
13337 fgRemoveRestOfBlock = true;
13340 GenTreePtr throwNode = op1->gtOp.gtOp1;
13341 noway_assert(throwNode->gtType == TYP_VOID);
13343 if (oper == GT_COMMA)
13345 /* Both tree and op1 are GT_COMMA nodes */
13346 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13347 tree->gtOp.gtOp1 = throwNode;
13350 else if (oper != GT_NOP)
13352 if (genActualType(typ) == genActualType(op1->gtType))
13354 /* The types match so, return the comma throw node as the new tree */
13359 if (typ == TYP_VOID)
13361 // Return the throw node
13366 GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13368 // need type of oper to be same as tree
13369 if (typ == TYP_LONG)
13371 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13372 commaOp2->gtIntConCommon.SetLngValue(0);
13373 /* Change the types of oper and commaOp2 to TYP_LONG */
13374 op1->gtType = commaOp2->gtType = TYP_LONG;
13376 else if (varTypeIsFloating(typ))
13378 commaOp2->ChangeOperConst(GT_CNS_DBL);
13379 commaOp2->gtDblCon.gtDconVal = 0.0;
13380 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13381 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13385 commaOp2->ChangeOperConst(GT_CNS_INT);
13386 commaOp2->gtIntConCommon.SetIconValue(0);
13387 /* Change the types of oper and commaOp2 to TYP_INT */
13388 op1->gtType = commaOp2->gtType = TYP_INT;
13391 /* Return the GT_COMMA node as the new tree */
13398 /* Check for op2 as a GT_COMMA with a unconditional throw */
13400 if (op2 && fgIsCommaThrow(op2, true))
13402 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13404 /* We can safely throw out the rest of the statements */
13405 fgRemoveRestOfBlock = true;
13408 // If op1 has no side-effects
13409 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13411 // If tree is an asg node
13412 if (tree->OperIsAssignment())
13414 /* Return the throw node as the new tree */
13415 return op2->gtOp.gtOp1;
13418 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13420 /* Return the throw node as the new tree */
13421 return op2->gtOp.gtOp1;
13424 // If tree is a comma node
13425 if (tree->OperGet() == GT_COMMA)
13427 /* Return the throw node as the new tree */
13428 return op2->gtOp.gtOp1;
13431 /* for the shift nodes the type of op2 can differ from the tree type */
13432 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13434 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13436 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13438 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13439 commaOp2->gtIntConCommon.SetLngValue(0);
13441 /* Change the types of oper and commaOp2 to TYP_LONG */
13442 op2->gtType = commaOp2->gtType = TYP_LONG;
13445 if ((genActualType(typ) == TYP_INT) &&
13446 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13448 // An example case is comparison (say GT_GT) of two longs or floating point values.
13450 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13452 commaOp2->ChangeOperConst(GT_CNS_INT);
13453 commaOp2->gtIntCon.gtIconVal = 0;
13454 /* Change the types of oper and commaOp2 to TYP_INT */
13455 op2->gtType = commaOp2->gtType = TYP_INT;
13458 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13460 noway_assert(tree->OperGet() == GT_ADD);
13462 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13464 commaOp2->ChangeOperConst(GT_CNS_INT);
13465 commaOp2->gtIntCon.gtIconVal = 0;
13466 /* Change the types of oper and commaOp2 to TYP_BYREF */
13467 op2->gtType = commaOp2->gtType = TYP_BYREF;
13470 /* types should now match */
13471 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13473 /* Return the GT_COMMA node as the new tree */
13479 /*-------------------------------------------------------------------------
13480 * Optional morphing is done if tree transformations is permitted
13483 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13488 tree = fgMorphSmpOpOptional(tree->AsOp());
13490 } // extra scope for gcc workaround
13494 #pragma warning(pop)
13497 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13499 genTreeOps oper = tree->gtOper;
13500 GenTree* op1 = tree->gtOp1;
13501 GenTree* op2 = tree->gtOp2;
13502 var_types typ = tree->TypeGet();
13504 if (GenTree::OperIsCommutative(oper))
13506 /* Swap the operands so that the more expensive one is 'op1' */
13508 if (tree->gtFlags & GTF_REVERSE_OPS)
13516 tree->gtFlags &= ~GTF_REVERSE_OPS;
13519 if (oper == op2->gtOper)
13521 /* Reorder nested operators at the same precedence level to be
13522 left-recursive. For example, change "(a+(b+c))" to the
13523 equivalent expression "((a+b)+c)".
13526 /* Things are handled differently for floating-point operators */
13528 if (!varTypeIsFloating(tree->TypeGet()))
13530 fgMoveOpsLeft(tree);
13539 /* Change "((x+icon)+y)" to "((x+y)+icon)"
13540 Don't reorder floating-point operations */
13542 if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13543 varTypeIsIntegralOrI(typ))
13545 GenTreePtr ad2 = op1->gtOp.gtOp2;
13547 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13559 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
13560 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13561 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
13564 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13567 if (varTypeIsGC(op2->TypeGet()))
13569 noway_assert(varTypeIsGC(typ));
13574 op1->gtOp.gtOp2 = op2;
13575 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13583 /*-------------------------------------------------------------------------
13584 * Perform optional oper-specific postorder morphing
13590 bool dstIsSafeLclVar;
13593 /* We'll convert "a = a <op> x" into "a <op>= x" */
13594 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
13595 CLANG_FORMAT_COMMENT_ANCHOR;
13597 if (typ == TYP_LONG)
13602 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13604 if (tree->OperIsCopyBlkOp())
13606 return fgMorphCopyBlock(tree);
13610 return fgMorphInitBlock(tree);
13614 /* Make sure we're allowed to do this */
13616 if (optValnumCSE_phase)
13618 // It is not safe to reorder/delete CSE's
13622 /* Are we assigning to a GT_LCL_VAR ? */
13624 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13626 /* If we have a GT_LCL_VAR, then is the address taken? */
13627 if (dstIsSafeLclVar)
13629 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
13630 LclVarDsc* varDsc = lvaTable + lclNum;
13632 noway_assert(lclNum < lvaCount);
13634 /* Is the address taken? */
13635 if (varDsc->lvAddrExposed)
13637 dstIsSafeLclVar = false;
13639 else if (op2->gtFlags & GTF_ASG)
13645 if (!dstIsSafeLclVar)
13647 if (op2->gtFlags & GTF_ASG)
13652 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13658 /* Special case: a cast that can be thrown away */
13660 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13666 srct = op2->gtCast.CastOp()->TypeGet();
13667 cast = (var_types)op2->CastToType();
13668 dstt = op1->TypeGet();
13670 /* Make sure these are all ints and precision is not lost */
13672 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13674 op2 = tree->gtOp2 = op2->gtCast.CastOp();
13678 /* Make sure we have the operator range right */
13680 noway_assert(GT_SUB == GT_ADD + 1);
13681 noway_assert(GT_MUL == GT_ADD + 2);
13682 noway_assert(GT_DIV == GT_ADD + 3);
13683 noway_assert(GT_MOD == GT_ADD + 4);
13684 noway_assert(GT_UDIV == GT_ADD + 5);
13685 noway_assert(GT_UMOD == GT_ADD + 6);
13687 noway_assert(GT_OR == GT_ADD + 7);
13688 noway_assert(GT_XOR == GT_ADD + 8);
13689 noway_assert(GT_AND == GT_ADD + 9);
13691 noway_assert(GT_LSH == GT_ADD + 10);
13692 noway_assert(GT_RSH == GT_ADD + 11);
13693 noway_assert(GT_RSZ == GT_ADD + 12);
13695 /* Check for a suitable operator on the RHS */
13697 cmop = op2->OperGet();
13702 // GT_CHS only supported for integer types
13703 if (varTypeIsFloating(tree->TypeGet()))
13711 // GT_ASG_MUL only supported for floating point types
13712 if (!varTypeIsFloating(tree->TypeGet()))
13721 if (op2->gtOverflow())
13723 /* Disable folding into "<op>=" if the result can be
13724 visible to anyone as <op> may throw an exception and
13725 the assignment should not proceed
13726 We are safe with an assignment to a local variables
13728 if (ehBlockHasExnFlowDsc(compCurBB))
13732 if (!dstIsSafeLclVar)
13737 #ifndef _TARGET_AMD64_
13738 // This is hard for byte-operations as we need to make
13739 // sure both operands are in RBM_BYTE_REGS.
13740 if (varTypeIsByte(op2->TypeGet()))
13742 #endif // _TARGET_AMD64_
13747 // GT_ASG_DIV only supported for floating point types
13748 if (!varTypeIsFloating(tree->TypeGet()))
13761 bool bReverse = false;
13762 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
13763 if (bAsgOpFoldable)
13767 // We will transform this from "a = x <op> a" to "a <op>= x"
13768 // so we can now destroy the duplicate "a"
13769 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
13770 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
13773 /* Special case: "x |= -1" and "x &= 0" */
13774 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
13775 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
13777 /* Simply change to an assignment */
13778 tree->gtOp2 = op2->gtOp.gtOp2;
13782 if (cmop == GT_NEG)
13784 /* This is "x = -x;", use the flipsign operator */
13786 tree->ChangeOper(GT_CHS);
13788 if (op1->gtOper == GT_LCL_VAR)
13790 op1->gtFlags |= GTF_VAR_USEASG;
13793 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
13798 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
13800 // Changing from x = x op y to x op= y when x is a small integer type
13801 // makes the op size smaller (originally the op size was 32 bits, after
13802 // sign or zero extension of x, and there is an implicit truncation in the
13804 // This is ok in most cases because the upper bits were
13805 // lost when assigning the op result to a small type var,
13806 // but it may not be ok for the right shift operation where the higher bits
13807 // could be shifted into the lower bits and preserved.
13808 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
13809 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
13810 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
13813 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
13814 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
13815 // The result becomes correct if we use >>unsigned instead of >>signed.
13816 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
13820 /* Replace with an assignment operator */
13821 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
13822 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
13823 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
13824 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
13825 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
13826 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
13827 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
13828 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
13830 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
13831 tree->gtOp2 = op2->gtOp.gtOp2;
13833 /* Propagate GTF_OVERFLOW */
13835 if (op2->gtOverflowEx())
13837 tree->gtType = op2->gtType;
13838 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
13841 #if FEATURE_SET_FLAGS
13843 /* Propagate GTF_SET_FLAGS */
13844 if (op2->gtSetFlags())
13846 tree->gtRequestSetFlags();
13849 #endif // FEATURE_SET_FLAGS
13851 DEBUG_DESTROY_NODE(op2);
13854 /* The target is used as well as being defined */
13855 if (op1->OperIsLocal())
13857 op1->gtFlags &= ~GTF_VAR_USEDEF;
13858 op1->gtFlags |= GTF_VAR_USEASG;
13861 #if CPU_HAS_FP_SUPPORT
13862 /* Check for the special case "x += y * x;" */
13864 // GT_ASG_MUL only supported for floating point types
13865 if (cmop != GT_ADD && cmop != GT_SUB)
13870 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
13872 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
13874 /* Change "x += x * y" into "x *= (y + 1)" */
13876 op2 = op2->gtOp.gtOp2;
13878 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
13880 /* Change "x += y * x" into "x *= (y + 1)" */
13882 op2 = op2->gtOp.gtOp1;
13889 op1 = gtNewDconNode(1.0);
13891 /* Now make the "*=" node */
13893 if (cmop == GT_ADD)
13895 /* Change "x += x * y" into "x *= (y + 1)" */
13897 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
13901 /* Change "x -= x * y" into "x *= (1 - y)" */
13903 noway_assert(cmop == GT_SUB);
13904 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
13906 tree->ChangeOper(GT_ASG_MUL);
13908 #endif // CPU_HAS_FP_SUPPORT
13916 /* Is the destination identical to the first RHS sub-operand? */
13918 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
13920 /* This is "x = ~x" which is the same as "x ^= -1"
13921 * Transform the node into a GT_ASG_XOR */
13923 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
13925 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
13940 /* Check for the case "(val + icon) * icon" */
13942 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
13944 GenTreePtr add = op1->gtOp.gtOp2;
13946 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
13948 if (tree->gtOverflow() || op1->gtOverflow())
13953 ssize_t imul = op2->gtIntCon.gtIconVal;
13954 ssize_t iadd = add->gtIntCon.gtIconVal;
13956 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
13959 tree->ChangeOper(oper);
13961 op2->gtIntCon.gtIconVal = iadd * imul;
13963 op1->ChangeOper(GT_MUL);
13965 add->gtIntCon.gtIconVal = imul;
13966 #ifdef _TARGET_64BIT_
13967 if (add->gtType == TYP_INT)
13969 // we need to properly re-sign-extend or truncate after multiplying two int constants above
13970 add->AsIntCon()->TruncateOrSignExtend32();
13972 #endif //_TARGET_64BIT_
13980 /* For "val / 1", just return "val" */
13982 if (op2->IsIntegralConst(1))
13984 DEBUG_DESTROY_NODE(tree);
13992 /* Check for the case "(val + icon) << icon" */
13994 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
13996 GenTreePtr cns = op1->gtOp.gtOp2;
13998 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14000 ssize_t ishf = op2->gtIntConCommon.IconValue();
14001 ssize_t iadd = cns->gtIntConCommon.IconValue();
14003 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14005 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14007 tree->ChangeOper(GT_ADD);
14008 ssize_t result = iadd << ishf;
14009 op2->gtIntConCommon.SetIconValue(result);
14010 #ifdef _TARGET_64BIT_
14011 if (op1->gtType == TYP_INT)
14013 op2->AsIntCon()->TruncateOrSignExtend32();
14015 #endif // _TARGET_64BIT_
14017 // we are reusing the shift amount node here, but the type we want is that of the shift result
14018 op2->gtType = op1->gtType;
14020 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14021 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14023 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14024 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14027 op1->ChangeOper(GT_LSH);
14029 cns->gtIntConCommon.SetIconValue(ishf);
14037 if (!optValnumCSE_phase)
14039 /* "x ^ -1" is "~x" */
14041 if (op2->IsIntegralConst(-1))
14043 tree->ChangeOper(GT_NOT);
14044 tree->gtOp2 = nullptr;
14045 DEBUG_DESTROY_NODE(op2);
14047 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14049 /* "binaryVal ^ 1" is "!binaryVal" */
14050 gtReverseCond(op1);
14051 DEBUG_DESTROY_NODE(op2);
14052 DEBUG_DESTROY_NODE(tree);
14060 // Initialization values for initBlk have special semantics - their lower
14061 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14062 // which enables them to get a VNForZero, and be propagated.
14063 if (op1->IsIntegralConst(0))
14075 //------------------------------------------------------------------------
14076 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14077 // (see ECMA III 3.55 and III.3.56).
14080 // tree - The GT_MOD/GT_UMOD tree to morph
14083 // The morphed tree
14086 // For ARM64 we don't have a remainder instruction so this transform is
14087 // always done. For XARCH this transform is done if we know that magic
14088 // division will be used, in that case this transform allows CSE to
14089 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14091 // This method will produce the above expression in 'a' and 'b' are
14092 // leaf nodes, otherwise, if any of them is not a leaf it will spill
14093 // its value into a temporary variable, an example:
14094 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14096 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14098 if (tree->OperGet() == GT_MOD)
14100 tree->SetOper(GT_DIV);
14102 else if (tree->OperGet() == GT_UMOD)
14104 tree->SetOper(GT_UDIV);
14108 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14111 var_types type = tree->gtType;
14112 GenTree* denominator = tree->gtOp2;
14113 GenTree* numerator = tree->gtOp1;
14115 if (!numerator->OperIsLeaf())
14117 numerator = fgMakeMultiUse(&tree->gtOp1);
14119 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14121 // Morphing introduces new lclVar references. Increase ref counts
14122 lvaIncRefCnts(numerator);
14125 if (!denominator->OperIsLeaf())
14127 denominator = fgMakeMultiUse(&tree->gtOp2);
14129 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14131 // Morphing introduces new lclVar references. Increase ref counts
14132 lvaIncRefCnts(denominator);
14135 // The numerator and denominator may have been assigned to temps, in which case
14136 // their defining assignments are in the current tree. Therefore, we need to
14137 // set the execuction order accordingly on the nodes we create.
14138 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14139 // be set to be evaluated in reverse order.
14141 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14142 assert(!mul->IsReverseOp());
14143 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14144 sub->gtFlags |= GTF_REVERSE_OPS;
14147 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14153 //------------------------------------------------------------------------------
14154 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14158 // oper - Operation to check
14161 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
14163 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14165 return (oper == GT_OR) || (oper == GT_XOR);
14168 //------------------------------------------------------------------------------
14169 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14170 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14173 // tree - tree to check for a rotation pattern
14176 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14179 // The input is a GT_OR or a GT_XOR tree.
14181 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14183 #ifndef LEGACY_BACKEND
14185 // Check for a rotation pattern, e.g.,
14198 // The patterns recognized:
14199 // (x << (y & M)) op (x >>> ((-y + N) & M))
14200 // (x >>> ((-y + N) & M)) op (x << (y & M))
14202 // (x << y) op (x >>> (-y + N))
14203 // (x >> > (-y + N)) op (x << y)
14205 // (x >>> (y & M)) op (x << ((-y + N) & M))
14206 // (x << ((-y + N) & M)) op (x >>> (y & M))
14208 // (x >>> y) op (x << (-y + N))
14209 // (x << (-y + N)) op (x >>> y)
14211 // (x << c1) op (x >>> c2)
14212 // (x >>> c1) op (x << c2)
14215 // c1 and c2 are const
14216 // c1 + c2 == bitsize(x)
14219 // M & (N - 1) == N - 1
14220 // op is either | or ^
14222 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14224 // We can't do anything if the tree has assignments, calls, or volatile
14225 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14226 // thrown by the original tree will be thrown by the transformed tree as well.
14230 genTreeOps oper = tree->OperGet();
14231 assert(fgOperIsBitwiseRotationRoot(oper));
14233 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14234 GenTreePtr op1 = tree->gtGetOp1();
14235 GenTreePtr op2 = tree->gtGetOp2();
14236 GenTreePtr leftShiftTree = nullptr;
14237 GenTreePtr rightShiftTree = nullptr;
14238 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14240 leftShiftTree = op1;
14241 rightShiftTree = op2;
14243 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14245 leftShiftTree = op2;
14246 rightShiftTree = op1;
14253 // Check if the trees representing the value to shift are identical.
14254 // We already checked that there are no side effects above.
14255 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14257 GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
14258 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14259 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14260 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14261 GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
14262 GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14264 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14265 // shouldn't be masked for the transformation to be valid. If additional
14266 // higher bits are not masked, the transformation is still valid since the result
14267 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14268 // than the width of the value being shifted.
14269 ssize_t minimalMask = rotatedValueBitSize - 1;
14270 ssize_t leftShiftMask = -1;
14271 ssize_t rightShiftMask = -1;
14273 if ((leftShiftIndex->OperGet() == GT_AND))
14275 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14277 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14278 leftShiftIndex = leftShiftIndex->gtGetOp1();
14286 if ((rightShiftIndex->OperGet() == GT_AND))
14288 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14290 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14291 rightShiftIndex = rightShiftIndex->gtGetOp1();
14299 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14301 // The shift index is overmasked, e.g., we have
14302 // something like (x << y & 15) or
14303 // (x >> (32 - y) & 15 with 32 bit x.
14304 // The transformation is not valid.
14308 GenTreePtr shiftIndexWithAdd = nullptr;
14309 GenTreePtr shiftIndexWithoutAdd = nullptr;
14310 genTreeOps rotateOp = GT_NONE;
14311 GenTreePtr rotateIndex = nullptr;
14313 if (leftShiftIndex->OperGet() == GT_ADD)
14315 shiftIndexWithAdd = leftShiftIndex;
14316 shiftIndexWithoutAdd = rightShiftIndex;
14319 else if (rightShiftIndex->OperGet() == GT_ADD)
14321 shiftIndexWithAdd = rightShiftIndex;
14322 shiftIndexWithoutAdd = leftShiftIndex;
14326 if (shiftIndexWithAdd != nullptr)
14328 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14330 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14332 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14334 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14336 // We found one of these patterns:
14337 // (x << (y & M)) | (x >>> ((-y + N) & M))
14338 // (x << y) | (x >>> (-y + N))
14339 // (x >>> (y & M)) | (x << ((-y + N) & M))
14340 // (x >>> y) | (x << (-y + N))
14341 // where N == bitsize(x), M is const, and
14342 // M & (N - 1) == N - 1
14343 CLANG_FORMAT_COMMENT_ANCHOR;
14345 #ifndef _TARGET_64BIT_
14346 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14348 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14349 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14350 // to add helpers for GT_ROL and GT_ROR.
14355 rotateIndex = shiftIndexWithoutAdd;
14361 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14363 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14365 // We found this pattern:
14366 // (x << c1) | (x >>> c2)
14367 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14369 rotateIndex = leftShiftIndex;
14373 if (rotateIndex != nullptr)
14375 noway_assert(GenTree::OperIsRotate(rotateOp));
14377 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14379 // We can use the same tree only during global morph; reusing the tree in a later morph
14380 // may invalidate value numbers.
14383 tree->gtOp.gtOp1 = rotatedValue;
14384 tree->gtOp.gtOp2 = rotateIndex;
14385 tree->ChangeOper(rotateOp);
14387 unsigned childFlags = 0;
14388 for (GenTree* op : tree->Operands())
14390 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14393 // The parent's flags should be a superset of its operands' flags
14394 noway_assert((inputTreeEffects & childFlags) == childFlags);
14398 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14399 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14405 #endif // LEGACY_BACKEND
14409 #if !CPU_HAS_FP_SUPPORT
14410 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14413 genTreeOps oper = tree->OperGet();
14414 var_types typ = tree->TypeGet();
14415 GenTreePtr op1 = tree->gtOp.gtOp1;
14416 GenTreePtr op2 = tree->gtGetOp2IfPresent();
14419 We have to use helper calls for all FP operations:
14421 FP operators that operate on FP values
14422 casts to and from FP
14423 comparisons of FP values
14426 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14430 size_t argc = genTypeStSz(typ);
14432 /* Not all FP operations need helper calls */
14446 /* If the result isn't FP, it better be a compare or cast */
14448 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14451 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14454 /* Keep track of how many arguments we're passing */
14456 fgPtrArgCntCur += argc;
14458 /* Is this a binary operator? */
14462 /* Add the second operand to the argument count */
14464 fgPtrArgCntCur += argc;
14467 /* What kind of an operator do we have? */
14472 helper = CPX_R4_ADD;
14475 helper = CPX_R4_SUB;
14478 helper = CPX_R4_MUL;
14481 helper = CPX_R4_DIV;
14483 // case GT_MOD: helper = CPX_R4_REM; break;
14486 helper = CPX_R4_EQ;
14489 helper = CPX_R4_NE;
14492 helper = CPX_R4_LT;
14495 helper = CPX_R4_LE;
14498 helper = CPX_R4_GE;
14501 helper = CPX_R4_GT;
14508 noway_assert(!"unexpected FP binary op");
14512 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14522 noway_assert(!"FP cast");
14525 helper = CPX_R4_NEG;
14532 noway_assert(!"unexpected FP unary op");
14536 args = gtNewArgList(tree->gtOp.gtOp1);
14539 /* If we have double result/operands, modify the helper */
14541 if (typ == TYP_DOUBLE)
14543 noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
14544 noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
14545 noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
14546 noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
14547 noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
14553 noway_assert(tree->OperIsCompare());
14555 noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
14556 noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
14557 noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
14558 noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
14559 noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
14560 noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
14563 tree = fgMorphIntoHelperCall(tree, helper, args);
14565 if (fgPtrArgCntMax < fgPtrArgCntCur)
14567 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14568 fgPtrArgCntMax = fgPtrArgCntCur;
14571 fgPtrArgCntCur -= argc;
14579 if (compCurBB == genReturnBB)
14581 /* This is the 'exitCrit' call at the exit label */
14583 noway_assert(op1->gtType == TYP_VOID);
14584 noway_assert(op2 == 0);
14586 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14591 /* This is a (real) return value -- check its type */
14592 CLANG_FORMAT_COMMENT_ANCHOR;
14595 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14597 bool allowMismatch = false;
14599 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14600 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14601 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14602 allowMismatch = true;
14604 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14605 allowMismatch = true;
14607 if (!allowMismatch)
14608 NO_WAY("Return type mismatch");
14618 /*****************************************************************************
14620 * Transform the given tree for code generation and return an equivalent tree.
14623 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14625 noway_assert(tree);
14626 noway_assert(tree->gtOper != GT_STMT);
14631 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14633 noway_assert(!"JitBreakMorphTree hit");
14639 int thisMorphNum = 0;
14640 if (verbose && treesBeforeAfterMorph)
14642 thisMorphNum = morphNum++;
14643 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14648 /*-------------------------------------------------------------------------
14649 * fgMorphTree() can potentially replace a tree with another, and the
14650 * caller has to store the return value correctly.
14651 * Turn this on to always make copy of "tree" here to shake out
14652 * hidden/unupdated references.
14657 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14661 #ifdef SMALL_TREE_NODES
14662 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14664 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14669 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14672 copy->CopyFrom(tree, this);
14674 #if defined(LATE_DISASM)
14675 // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
14676 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14678 copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
14679 copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
14683 DEBUG_DESTROY_NODE(tree);
14690 /* Ensure that we haven't morphed this node already */
14691 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14693 #if LOCAL_ASSERTION_PROP
14694 /* Before morphing the tree, we try to propagate any active assertions */
14695 if (optLocalAssertionProp)
14697 /* Do we have any active assertions? */
14699 if (optAssertionCount > 0)
14701 GenTreePtr newTree = tree;
14702 while (newTree != nullptr)
14705 /* newTree is non-Null if we propagated an assertion */
14706 newTree = optAssertionProp(apFull, tree, nullptr);
14708 noway_assert(tree != nullptr);
14711 PREFAST_ASSUME(tree != nullptr);
14715 /* Save the original un-morphed tree for fgMorphTreeDone */
14717 GenTreePtr oldTree = tree;
14719 /* Figure out what kind of a node we have */
14721 unsigned kind = tree->OperKind();
14723 /* Is this a constant node? */
14725 if (kind & GTK_CONST)
14727 tree = fgMorphConst(tree);
14731 /* Is this a leaf node? */
14733 if (kind & GTK_LEAF)
14735 tree = fgMorphLeaf(tree);
14739 /* Is it a 'simple' unary/binary operator? */
14741 if (kind & GTK_SMPOP)
14743 tree = fgMorphSmpOp(tree, mac);
14747 /* See what kind of a special operator we have here */
14749 switch (tree->OperGet())
14752 tree = fgMorphField(tree, mac);
14756 tree = fgMorphCall(tree->AsCall());
14759 case GT_ARR_BOUNDS_CHECK:
14760 #ifdef FEATURE_SIMD
14762 #endif // FEATURE_SIMD
14764 fgSetRngChkTarget(tree);
14766 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
14767 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
14768 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
14769 // If the index is a comma(throw, x), just return that.
14770 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
14772 tree = bndsChk->gtIndex;
14775 // Propagate effects flags upwards
14776 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
14777 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
14779 // Otherwise, we don't change the tree.
14784 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
14785 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14788 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
14790 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
14791 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
14795 fgSetRngChkTarget(tree, false);
14799 case GT_ARR_OFFSET:
14800 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
14801 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
14802 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
14803 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
14804 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
14805 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14808 fgSetRngChkTarget(tree, false);
14813 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
14814 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
14815 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
14818 case GT_STORE_DYN_BLK:
14819 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
14822 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
14823 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
14830 noway_assert(!"unexpected operator");
14834 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
14839 #if LOCAL_ASSERTION_PROP
14840 //------------------------------------------------------------------------
14841 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
14844 // lclNum - The varNum of the lclVar for which we're killing assertions.
14845 // tree - (DEBUG only) the tree responsible for killing its assertions.
14847 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
14849 /* All dependent assertions are killed here */
14851 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
14855 AssertionIndex index = optAssertionCount;
14856 while (killed && (index > 0))
14858 if (BitVecOps::IsMember(apTraits, killed, index - 1))
14861 AssertionDsc* curAssertion = optGetAssertion(index);
14862 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
14863 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
14866 printf("\nThe assignment ");
14868 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
14869 optPrintAssertion(curAssertion);
14872 // Remove this bit from the killed mask
14873 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
14875 optAssertionRemove(index);
14881 // killed mask should now be zero
14882 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
14885 //------------------------------------------------------------------------
14886 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
14889 // lclNum - The varNum of the lclVar for which we're killing assertions.
14890 // tree - (DEBUG only) the tree responsible for killing its assertions.
14893 // For structs and struct fields, it will invalidate the children and parent
14895 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
14897 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
14899 LclVarDsc* varDsc = &lvaTable[lclNum];
14901 if (varDsc->lvPromoted)
14903 noway_assert(varTypeIsStruct(varDsc));
14905 // Kill the field locals.
14906 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
14908 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
14911 // Kill the struct local itself.
14912 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14914 else if (varDsc->lvIsStructField)
14916 // Kill the field local.
14917 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14919 // Kill the parent struct.
14920 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
14924 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14927 #endif // LOCAL_ASSERTION_PROP
14929 /*****************************************************************************
14931 * This function is called to complete the morphing of a tree node
14932 * It should only be called once for each node.
14933 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
14934 * to enforce the invariant that each node is only morphed once.
14935 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
14936 * by an equivalent tree.
14940 void Compiler::fgMorphTreeDone(GenTreePtr tree,
14941 GenTreePtr oldTree /* == NULL */
14942 DEBUGARG(int morphNum))
14945 if (verbose && treesBeforeAfterMorph)
14947 printf("\nfgMorphTree (after %d):\n", morphNum);
14949 printf(""); // in our logic this causes a flush
14953 if (!fgGlobalMorph)
14958 if ((oldTree != nullptr) && (oldTree != tree))
14960 /* Ensure that we have morphed this node */
14961 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
14964 TransferTestDataToNode(oldTree, tree);
14969 // Ensure that we haven't morphed this node already
14970 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14973 if (tree->OperKind() & GTK_CONST)
14978 #if LOCAL_ASSERTION_PROP
14980 if (!optLocalAssertionProp)
14985 /* Do we have any active assertions? */
14987 if (optAssertionCount > 0)
14989 /* Is this an assignment to a local variable */
14990 GenTreeLclVarCommon* lclVarTree = nullptr;
14991 if (tree->DefinesLocal(this, &lclVarTree))
14993 unsigned lclNum = lclVarTree->gtLclNum;
14994 noway_assert(lclNum < lvaCount);
14995 fgKillDependentAssertions(lclNum DEBUGARG(tree));
14999 /* If this tree makes a new assertion - make it available */
15000 optAssertionGen(tree);
15002 #endif // LOCAL_ASSERTION_PROP
15007 /* Mark this node as being morphed */
15008 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15012 /*****************************************************************************
15014 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15015 * Returns true if we modified the flow graph
15018 bool Compiler::fgFoldConditional(BasicBlock* block)
15020 bool result = false;
15022 // We don't want to make any code unreachable
15023 if (opts.compDbgCode || opts.MinOpts())
15028 if (block->bbJumpKind == BBJ_COND)
15030 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15032 GenTreePtr stmt = block->bbTreeList->gtPrev;
15034 noway_assert(stmt->gtNext == nullptr);
15036 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15038 noway_assert(fgRemoveRestOfBlock);
15040 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15041 fgConvertBBToThrowBB(block);
15043 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15044 fgRemoveRefPred(block->bbNext, block);
15046 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15047 fgRemoveRefPred(block->bbJumpDest, block);
15052 printf("\nConditional folded at BB%02u\n", block->bbNum);
15053 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15059 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15061 /* Did we fold the conditional */
15063 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15065 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15067 if (cond->OperKind() & GTK_CONST)
15069 /* Yupee - we folded the conditional!
15070 * Remove the conditional statement */
15072 noway_assert(cond->gtOper == GT_CNS_INT);
15073 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15075 /* remove the statement from bbTreelist - No need to update
15076 * the reference counts since there are no lcl vars */
15077 fgRemoveStmt(block, stmt);
15079 // block is a BBJ_COND that we are folding the conditional for
15080 // bTaken is the path that will always be taken from block
15081 // bNotTaken is the path that will never be taken from block
15083 BasicBlock* bTaken;
15084 BasicBlock* bNotTaken;
15086 if (cond->gtIntCon.gtIconVal != 0)
15088 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15089 block->bbJumpKind = BBJ_ALWAYS;
15090 bTaken = block->bbJumpDest;
15091 bNotTaken = block->bbNext;
15095 /* Unmark the loop if we are removing a backwards branch */
15096 /* dest block must also be marked as a loop head and */
15097 /* We must be able to reach the backedge block */
15098 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15099 fgReachable(block->bbJumpDest, block))
15101 optUnmarkLoopBlocks(block->bbJumpDest, block);
15104 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15105 block->bbJumpKind = BBJ_NONE;
15106 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15107 bTaken = block->bbNext;
15108 bNotTaken = block->bbJumpDest;
15111 if (fgHaveValidEdgeWeights)
15113 // We are removing an edge from block to bNotTaken
15114 // and we have already computed the edge weights, so
15115 // we will try to adjust some of the weights
15117 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15118 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15120 // We examine the taken edge (block -> bTaken)
15121 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15122 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15123 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15125 if (block->hasProfileWeight())
15127 // The edge weights for (block -> bTaken) are 100% of block's weight
15128 edgeTaken->flEdgeWeightMin = block->bbWeight;
15129 edgeTaken->flEdgeWeightMax = block->bbWeight;
15131 if (!bTaken->hasProfileWeight())
15133 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15135 // Update the weight of bTaken
15136 bTaken->inheritWeight(block);
15141 else if (bTaken->hasProfileWeight())
15143 if (bTaken->countOfInEdges() == 1)
15145 // There is only one in edge to bTaken
15146 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15147 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15149 // Update the weight of block
15150 block->inheritWeight(bTaken);
15155 if (bUpdated != nullptr)
15158 // Now fix the weights of the edges out of 'bUpdated'
15159 switch (bUpdated->bbJumpKind)
15162 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15163 edge->flEdgeWeightMax = bUpdated->bbWeight;
15166 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15167 edge->flEdgeWeightMax = bUpdated->bbWeight;
15170 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15171 edge->flEdgeWeightMax = bUpdated->bbWeight;
15174 // We don't handle BBJ_SWITCH
15180 /* modify the flow graph */
15182 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15183 fgRemoveRefPred(bNotTaken, block);
15188 printf("\nConditional folded at BB%02u\n", block->bbNum);
15189 printf("BB%02u becomes a %s", block->bbNum,
15190 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15191 if (block->bbJumpKind == BBJ_ALWAYS)
15193 printf(" to BB%02u", block->bbJumpDest->bbNum);
15199 /* if the block was a loop condition we may have to modify
15200 * the loop table */
15202 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15204 /* Some loops may have been already removed by
15205 * loop unrolling or conditional folding */
15207 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15212 /* We are only interested in the loop bottom */
15214 if (optLoopTable[loopNum].lpBottom == block)
15216 if (cond->gtIntCon.gtIconVal == 0)
15218 /* This was a bogus loop (condition always false)
15219 * Remove the loop from the table */
15221 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15225 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15226 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15236 else if (block->bbJumpKind == BBJ_SWITCH)
15238 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15240 GenTreePtr stmt = block->bbTreeList->gtPrev;
15242 noway_assert(stmt->gtNext == nullptr);
15244 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15246 noway_assert(fgRemoveRestOfBlock);
15248 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15249 fgConvertBBToThrowBB(block);
15251 /* update the flow graph */
15253 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15254 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15256 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15258 BasicBlock* curJump = *jumpTab;
15260 /* Remove 'block' from the predecessor list of 'curJump' */
15261 fgRemoveRefPred(curJump, block);
15267 printf("\nConditional folded at BB%02u\n", block->bbNum);
15268 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15274 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15276 /* Did we fold the conditional */
15278 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15280 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15282 if (cond->OperKind() & GTK_CONST)
15284 /* Yupee - we folded the conditional!
15285 * Remove the conditional statement */
15287 noway_assert(cond->gtOper == GT_CNS_INT);
15289 /* remove the statement from bbTreelist - No need to update
15290 * the reference counts since there are no lcl vars */
15291 fgRemoveStmt(block, stmt);
15293 /* modify the flow graph */
15295 /* Find the actual jump target */
15296 unsigned switchVal;
15297 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15299 jumpCnt = block->bbJumpSwt->bbsCount;
15300 BasicBlock** jumpTab;
15301 jumpTab = block->bbJumpSwt->bbsDstTab;
15305 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15307 BasicBlock* curJump = *jumpTab;
15309 assert(curJump->countOfInEdges() > 0);
15311 // If val matches switchVal or we are at the last entry and
15312 // we never found the switch value then set the new jump dest
15314 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15316 if (curJump != block->bbNext)
15318 /* transform the basic block into a BBJ_ALWAYS */
15319 block->bbJumpKind = BBJ_ALWAYS;
15320 block->bbJumpDest = curJump;
15322 // if we are jumping backwards, make sure we have a GC Poll.
15323 if (curJump->bbNum > block->bbNum)
15325 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15330 /* transform the basic block into a BBJ_NONE */
15331 block->bbJumpKind = BBJ_NONE;
15332 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15338 /* Remove 'block' from the predecessor list of 'curJump' */
15339 fgRemoveRefPred(curJump, block);
15345 printf("\nConditional folded at BB%02u\n", block->bbNum);
15346 printf("BB%02u becomes a %s", block->bbNum,
15347 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15348 if (block->bbJumpKind == BBJ_ALWAYS)
15350 printf(" to BB%02u", block->bbJumpDest->bbNum);
15362 //*****************************************************************************
15364 // Morphs a single statement in a block.
15365 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15367 // Returns true if 'stmt' was removed from the block.
15368 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15371 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15373 assert(block != nullptr);
15374 assert(stmt != nullptr);
15377 compCurStmt = stmt;
15379 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15381 // Bug 1106830 - During the CSE phase we can't just remove
15382 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15383 // This leads to a noway_assert in OptCSE.cpp when
15384 // searching for the removed CSE ref. (using gtFindLink)
15386 if (!optValnumCSE_phase)
15388 // Check for morph as a GT_COMMA with an unconditional throw
15389 if (fgIsCommaThrow(morph, true))
15394 printf("Folding a top-level fgIsCommaThrow stmt\n");
15395 printf("Removing op2 as unreachable:\n");
15396 gtDispTree(morph->gtOp.gtOp2);
15400 // Use the call as the new stmt
15401 morph = morph->gtOp.gtOp1;
15402 noway_assert(morph->gtOper == GT_CALL);
15405 // we can get a throw as a statement root
15406 if (fgIsThrow(morph))
15411 printf("We have a top-level fgIsThrow stmt\n");
15412 printf("Removing the rest of block as unreachable:\n");
15415 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15416 fgRemoveRestOfBlock = true;
15420 stmt->gtStmtExpr = morph;
15422 if (lvaLocalVarRefCounted)
15424 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15425 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15428 // Can the entire tree be removed?
15429 bool removedStmt = fgCheckRemoveStmt(block, stmt);
15431 // Or this is the last statement of a conditional branch that was just folded?
15432 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15434 if (fgFoldConditional(block))
15436 if (block->bbJumpKind != BBJ_THROW)
15438 removedStmt = true;
15445 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15446 gtSetStmtInfo(stmt);
15448 // Have to re-link the nodes for this statement
15449 fgSetStmtSeq(stmt);
15455 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15461 if (fgRemoveRestOfBlock)
15463 // Remove the rest of the stmts in the block
15464 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15466 fgRemoveStmt(block, stmt);
15469 // The rest of block has been removed and we will always throw an exception.
15471 // Update succesors of block
15472 fgRemoveBlockAsPred(block);
15474 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15475 // We should not convert it to a ThrowBB.
15476 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15478 // Convert block to a throw bb
15479 fgConvertBBToThrowBB(block);
15485 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15488 fgRemoveRestOfBlock = false;
15491 return removedStmt;
15494 /*****************************************************************************
15496 * Morph the statements of the given block.
15497 * This function should be called just once for a block. Use fgMorphBlockStmt()
15498 * for reentrant calls.
15501 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15503 fgRemoveRestOfBlock = false;
15505 noway_assert(fgExpandInline == false);
15507 /* Make the current basic block address available globally */
15511 *mult = *lnot = *loadw = false;
15513 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15515 GenTreeStmt* stmt = block->firstStmt();
15516 GenTreePtr prev = nullptr;
15517 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15519 noway_assert(stmt->gtOper == GT_STMT);
15521 if (fgRemoveRestOfBlock)
15523 fgRemoveStmt(block, stmt);
15526 #ifdef FEATURE_SIMD
15527 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15529 fgMorphCombineSIMDFieldAssignments(block, stmt);
15533 fgMorphStmt = stmt;
15534 compCurStmt = stmt;
15535 GenTreePtr tree = stmt->gtStmtExpr;
15539 if (stmt == block->bbTreeList)
15541 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15544 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15548 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15553 /* Morph this statement tree */
15555 GenTreePtr morph = fgMorphTree(tree);
15557 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15559 fgCurrentlyInUseArgTemps->ZeroAll();
15561 // Has fgMorphStmt been sneakily changed ?
15563 if (stmt->gtStmtExpr != tree)
15565 /* This must be tailcall. Ignore 'morph' and carry on with
15566 the tail-call node */
15568 morph = stmt->gtStmtExpr;
15569 noway_assert(compTailCallUsed);
15570 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15571 noway_assert(stmt->gtNextStmt == nullptr);
15573 GenTreeCall* call = morph->AsCall();
15575 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15576 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15578 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15579 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15580 (compCurBB->bbFlags & BBF_HAS_JMP)));
15582 else if (block != compCurBB)
15584 /* This must be a tail call that caused a GCPoll to get
15585 injected. We haven't actually morphed the call yet
15586 but the flag still got set, clear it here... */
15587 CLANG_FORMAT_COMMENT_ANCHOR;
15590 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15593 noway_assert(compTailCallUsed);
15594 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15595 noway_assert(stmt->gtNextStmt == nullptr);
15597 GenTreeCall* call = morph->AsCall();
15600 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15601 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15603 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15604 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15605 (compCurBB->bbFlags & BBF_HAS_JMP)));
15609 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15611 // Clone all the trees to stress gtCloneExpr()
15615 printf("\nfgMorphTree (stressClone from):\n");
15619 morph = gtCloneExpr(morph);
15620 noway_assert(morph);
15624 printf("\nfgMorphTree (stressClone to):\n");
15629 /* If the hash value changes. we modified the tree during morphing */
15632 unsigned newHash = gtHashValue(morph);
15633 if (newHash != oldHash)
15635 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15641 /* Check for morph as a GT_COMMA with an unconditional throw */
15642 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15644 /* Use the call as the new stmt */
15645 morph = morph->gtOp.gtOp1;
15646 noway_assert(morph->gtOper == GT_CALL);
15647 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15649 fgRemoveRestOfBlock = true;
15652 stmt->gtStmtExpr = tree = morph;
15654 noway_assert(fgPtrArgCntCur == 0);
15656 if (fgRemoveRestOfBlock)
15661 /* Has the statement been optimized away */
15663 if (fgCheckRemoveStmt(block, stmt))
15668 /* Check if this block ends with a conditional branch that can be folded */
15670 if (fgFoldConditional(block))
15675 if (ehBlockHasExnFlowDsc(block))
15680 #if OPT_MULT_ADDSUB
15682 /* Note whether we have two or more +=/-= operators in a row */
15684 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
15686 if (prev && prev->gtOper == tree->gtOper)
15694 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
15696 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
15702 if (fgRemoveRestOfBlock)
15704 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15706 GenTreePtr first = block->bbTreeList;
15707 noway_assert(first);
15708 GenTreePtr last = first->gtPrev;
15709 noway_assert(last && last->gtNext == nullptr);
15710 GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
15712 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
15713 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
15715 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
15717 if (op1->OperKind() & GTK_RELOP)
15719 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15720 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15723 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
15727 /* Mark block as a BBJ_THROW block */
15728 fgConvertBBToThrowBB(block);
15731 noway_assert(fgExpandInline == false);
15733 #if FEATURE_FASTTAILCALL
15734 GenTreePtr recursiveTailCall = nullptr;
15735 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15737 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15742 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
15745 // Reset this back so that it doesn't leak out impacting other blocks
15746 fgRemoveRestOfBlock = false;
15749 /*****************************************************************************
15751 * Morph the blocks of the method.
15752 * Returns true if the basic block list is modified.
15753 * This function should be called just once.
15756 void Compiler::fgMorphBlocks()
15761 printf("\n*************** In fgMorphBlocks()\n");
15765 /* Since fgMorphTree can be called after various optimizations to re-arrange
15766 * the nodes we need a global flag to signal if we are during the one-pass
15767 * global morphing */
15769 fgGlobalMorph = true;
15771 #if LOCAL_ASSERTION_PROP
15773 // Local assertion prop is enabled if we are optimized
15775 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
15777 if (optLocalAssertionProp)
15780 // Initialize for local assertion prop
15782 optAssertionInit(true);
15784 #elif ASSERTION_PROP
15786 // If LOCAL_ASSERTION_PROP is not set
15787 // and we have global assertion prop
15788 // then local assertion prop is always off
15790 optLocalAssertionProp = false;
15794 /*-------------------------------------------------------------------------
15795 * Process all basic blocks in the function
15798 BasicBlock* block = fgFirstBB;
15799 noway_assert(block);
15802 compCurStmtNum = 0;
15807 #if OPT_MULT_ADDSUB
15815 bool loadw = false;
15820 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
15824 #if LOCAL_ASSERTION_PROP
15825 if (optLocalAssertionProp)
15828 // Clear out any currently recorded assertion candidates
15829 // before processing each basic block,
15830 // also we must handle QMARK-COLON specially
15832 optAssertionReset(0);
15836 /* Process all statement trees in the basic block */
15840 fgMorphStmts(block, &mult, &lnot, &loadw);
15842 #if OPT_MULT_ADDSUB
15844 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
15846 for (tree = block->bbTreeList; tree; tree = tree->gtNext)
15848 noway_assert(tree->gtOper == GT_STMT);
15849 GenTreePtr last = tree->gtStmt.gtStmtExpr;
15851 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
15856 GenTreePtr dst1 = last->gtOp.gtOp1;
15857 GenTreePtr src1 = last->gtOp.gtOp2;
15859 if (!last->IsCnsIntOrI())
15864 if (dst1->gtOper != GT_LCL_VAR)
15868 if (!src1->IsCnsIntOrI())
15878 /* Look at the next statement */
15880 temp = tree->gtNext;
15886 noway_assert(temp->gtOper == GT_STMT);
15887 next = temp->gtStmt.gtStmtExpr;
15889 if (next->gtOper != last->gtOper)
15893 if (next->gtType != last->gtType)
15898 dst2 = next->gtOp.gtOp1;
15899 src2 = next->gtOp.gtOp2;
15901 if (dst2->gtOper != GT_LCL_VAR)
15905 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
15910 if (!src2->IsCnsIntOrI())
15915 if (last->gtOverflow() != next->gtOverflow())
15920 const ssize_t i1 = src1->gtIntCon.gtIconVal;
15921 const ssize_t i2 = src2->gtIntCon.gtIconVal;
15922 const ssize_t itemp = i1 + i2;
15924 /* if the operators are checking for overflow, check for overflow of the operands */
15926 if (next->gtOverflow())
15928 if (next->TypeGet() == TYP_LONG)
15930 if (next->gtFlags & GTF_UNSIGNED)
15932 ClrSafeInt<UINT64> si1(i1);
15933 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
15940 ClrSafeInt<INT64> si1(i1);
15941 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
15947 else if (next->gtFlags & GTF_UNSIGNED)
15949 ClrSafeInt<UINT32> si1(i1);
15950 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
15957 ClrSafeInt<INT32> si1(i1);
15958 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
15965 /* Fold the two increments/decrements into one */
15967 src1->gtIntCon.gtIconVal = itemp;
15968 #ifdef _TARGET_64BIT_
15969 if (src1->gtType == TYP_INT)
15971 src1->AsIntCon()->TruncateOrSignExtend32();
15973 #endif //_TARGET_64BIT_
15975 /* Remove the second statement completely */
15977 noway_assert(tree->gtNext == temp);
15978 noway_assert(temp->gtPrev == tree);
15982 noway_assert(temp->gtNext->gtPrev == temp);
15984 temp->gtNext->gtPrev = tree;
15985 tree->gtNext = temp->gtNext;
15989 tree->gtNext = nullptr;
15991 noway_assert(block->bbTreeList->gtPrev == temp);
15993 block->bbTreeList->gtPrev = tree;
16004 /* Are we using a single return block? */
16006 if (block->bbJumpKind == BBJ_RETURN)
16008 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16010 /* We'll jump to the genReturnBB */
16011 CLANG_FORMAT_COMMENT_ANCHOR;
16013 #if !defined(_TARGET_X86_)
16014 if (info.compFlags & CORINFO_FLG_SYNCH)
16016 fgConvertSyncReturnToLeave(block);
16019 #endif // !_TARGET_X86_
16021 block->bbJumpKind = BBJ_ALWAYS;
16022 block->bbJumpDest = genReturnBB;
16026 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16027 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16028 // Such blocks do materialize as part of in-lining.
16030 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16031 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16032 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16035 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16037 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16038 GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16040 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16041 if (genReturnLocal != BAD_VAR_NUM)
16043 // Method must be returning a value other than TYP_VOID.
16044 noway_assert(compMethodHasRetVal());
16046 // This block must be ending with a GT_RETURN
16047 noway_assert(last != nullptr);
16048 noway_assert(last->gtOper == GT_STMT);
16049 noway_assert(last->gtNext == nullptr);
16050 noway_assert(ret != nullptr);
16052 // GT_RETURN must have non-null operand as the method is returning the value assigned to
16054 noway_assert(ret->OperGet() == GT_RETURN);
16055 noway_assert(ret->gtGetOp1() != nullptr);
16057 GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16059 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16061 // make sure that copy-prop ignores this assignment.
16062 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16064 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16066 // This block ends with a GT_RETURN
16067 noway_assert(last != nullptr);
16068 noway_assert(last->gtOper == GT_STMT);
16069 noway_assert(last->gtNext == nullptr);
16071 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16072 noway_assert(ret->TypeGet() == TYP_VOID);
16073 noway_assert(ret->gtGetOp1() == nullptr);
16075 fgRemoveStmt(block, last);
16081 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
16082 fgTableDispBasicBlock(block);
16088 block = block->bbNext;
16091 /* We are done with the global morphing phase */
16093 fgGlobalMorph = false;
16098 fgDispBasicBlocks(true);
16103 //------------------------------------------------------------------------
16104 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16107 // fpPtrArgCntMax records the maximum number of pushed arguments.
16108 // Depending upon this value of the maximum number of pushed arguments
16109 // we may need to use an EBP frame or be partially interuptible.
16110 // This functionality has been factored out of fgSetOptions() because
16111 // the Rationalizer can create new calls.
16114 // This must be called before isFramePointerRequired() is called, because it is a
16115 // phased variable (can only be written before it has been read).
16117 void Compiler::fgCheckArgCnt()
16119 if (!compCanEncodePtrArgCntMax())
16124 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16125 "interruptible\n");
16128 genInterruptible = false;
16130 if (fgPtrArgCntMax >= sizeof(unsigned))
16135 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16138 codeGen->setFramePointerRequired(true);
16142 /*****************************************************************************
16144 * Make some decisions about the kind of code to generate.
16147 void Compiler::fgSetOptions()
16150 /* Should we force fully interruptible code ? */
16151 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16153 noway_assert(!codeGen->isGCTypeFixed());
16154 genInterruptible = true;
16158 if (opts.compDbgCode)
16160 assert(!codeGen->isGCTypeFixed());
16161 genInterruptible = true; // debugging is easier this way ...
16164 /* Assume we won't need an explicit stack frame if this is allowed */
16166 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16167 // the callee-saved registers.
16168 noway_assert(!compTailCallUsed || !compLocallocUsed);
16170 if (compLocallocUsed)
16172 codeGen->setFramePointerRequired(true);
16175 #ifdef _TARGET_X86_
16177 if (compTailCallUsed)
16178 codeGen->setFramePointerRequired(true);
16180 #endif // _TARGET_X86_
16182 if (!opts.genFPopt)
16184 codeGen->setFramePointerRequired(true);
16187 // Assert that the EH table has been initialized by now. Note that
16188 // compHndBBtabAllocCount never decreases; it is a high-water mark
16189 // of table allocation. In contrast, compHndBBtabCount does shrink
16190 // if we delete a dead EH region, and if it shrinks to zero, the
16191 // table pointer compHndBBtab is unreliable.
16192 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16194 #ifdef _TARGET_X86_
16196 // Note: this case, and the !X86 case below, should both use the
16197 // !X86 path. This would require a few more changes for X86 to use
16198 // compHndBBtabCount (the current number of EH clauses) instead of
16199 // info.compXcptnsCount (the number of EH clauses in IL), such as
16200 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16201 // an EH clause that we delete as statically dead code before we
16202 // get here, leaving no EH clauses left, and thus no requirement
16203 // to use a frame pointer because of EH. But until all the code uses
16204 // the same test, leave info.compXcptnsCount here.
16205 if (info.compXcptnsCount > 0)
16207 codeGen->setFramePointerRequiredEH(true);
16210 #else // !_TARGET_X86_
16212 if (compHndBBtabCount > 0)
16214 codeGen->setFramePointerRequiredEH(true);
16217 #endif // _TARGET_X86_
16219 #ifdef UNIX_X86_ABI
16220 if (info.compXcptnsCount > 0)
16222 assert(!codeGen->isGCTypeFixed());
16223 // Enforce fully interruptible codegen for funclet unwinding
16224 genInterruptible = true;
16226 #endif // UNIX_X86_ABI
16230 if (info.compCallUnmanaged)
16232 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16235 if (info.compPublishStubParam)
16237 codeGen->setFramePointerRequiredGCInfo(true);
16240 if (opts.compNeedSecurityCheck)
16242 codeGen->setFramePointerRequiredGCInfo(true);
16244 #ifndef JIT32_GCENCODER
16246 // The decoder only reports objects in frames with exceptions if the frame
16247 // is fully interruptible.
16248 // Even if there is no catch or other way to resume execution in this frame
16249 // the VM requires the security object to remain alive until later, so
16250 // Frames with security objects must be fully interruptible.
16251 genInterruptible = true;
16253 #endif // JIT32_GCENCODER
16256 if (compIsProfilerHookNeeded())
16258 codeGen->setFramePointerRequired(true);
16261 if (info.compIsVarArgs)
16263 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16264 codeGen->setFramePointerRequiredGCInfo(true);
16267 if (lvaReportParamTypeArg())
16269 codeGen->setFramePointerRequiredGCInfo(true);
16272 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16275 /*****************************************************************************/
16277 GenTreePtr Compiler::fgInitThisClass()
16279 noway_assert(!compIsForInlining());
16281 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16283 if (!kind.needsRuntimeLookup)
16285 return fgGetSharedCCtor(info.compClassHnd);
16289 #ifdef FEATURE_READYTORUN_COMPILER
16290 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16291 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16293 CORINFO_RESOLVED_TOKEN resolvedToken;
16294 memset(&resolvedToken, 0, sizeof(resolvedToken));
16296 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16297 // This covers the case of a generic method on a non-generic type.
16298 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16300 resolvedToken.hClass = info.compClassHnd;
16301 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16304 // We need a runtime lookup.
16305 GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16307 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16308 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16309 // inlining and there's no ambiguity.
16310 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16311 gtNewArgList(ctxTree), &kind);
16315 // Collectible types requires that for shared generic code, if we use the generic context paramter
16316 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16317 // context parameter is this that we don't need the eager reporting logic.)
16318 lvaGenericsContextUseCount++;
16320 switch (kind.runtimeLookupKind)
16322 case CORINFO_LOOKUP_THISOBJ:
16323 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16326 GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16327 // Vtable pointer of this object
16328 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16329 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16330 GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16332 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16333 gtNewArgList(vtTree, methodHnd));
16336 case CORINFO_LOOKUP_CLASSPARAM:
16338 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16339 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16342 case CORINFO_LOOKUP_METHODPARAM:
16344 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16345 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16346 gtNewArgList(gtNewIconNode(0), methHndTree));
16351 noway_assert(!"Unknown LOOKUP_KIND");
16356 /*****************************************************************************
16358 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16359 * except for the allowed ? 1 : 0; pattern.
16361 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16363 if ((*tree)->OperGet() == GT_QMARK)
16365 fgCheckQmarkAllowedForm(*tree);
16367 return WALK_CONTINUE;
16370 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16372 assert(tree->OperGet() == GT_QMARK);
16373 #ifndef LEGACY_BACKEND
16374 assert(!"Qmarks beyond morph disallowed.");
16375 #else // LEGACY_BACKEND
16376 GenTreePtr colon = tree->gtOp.gtOp2;
16378 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16379 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16380 #endif // LEGACY_BACKEND
16383 /*****************************************************************************
16385 * Verify that the importer has created GT_QMARK nodes in a way we can
16386 * process them. The following is allowed:
16388 * 1. A top level qmark. Top level qmark is of the form:
16389 * a) (bool) ? (void) : (void) OR
16390 * b) V0N = (bool) ? (type) : (type)
16392 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16393 * of either op1 of colon or op2 of colon but not a child of any other
16396 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16398 GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16400 // If the top level Qmark is null, then scan the tree to make sure
16401 // there are no qmarks within it.
16402 if (topQmark == nullptr)
16404 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16408 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16409 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16410 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16412 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16413 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16418 /*****************************************************************************
16420 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16421 * node is not present. If the top level GT_QMARK node is assigned to a
16422 * GT_LCL_VAR, then return the lcl node in ppDst.
16425 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16427 if (ppDst != nullptr)
16432 GenTreePtr topQmark = nullptr;
16433 if (expr->gtOper == GT_QMARK)
16437 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16439 topQmark = expr->gtOp.gtOp2;
16440 if (ppDst != nullptr)
16442 *ppDst = expr->gtOp.gtOp1;
16448 /*********************************************************************************
16450 * For a castclass helper call,
16451 * Importer creates the following tree:
16452 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16454 * This method splits the qmark expression created by the importer into the
16455 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16456 * Notice that op1 is the result for both the conditions. So we coalesce these
16457 * assignments into a single block instead of two blocks resulting a nested diamond.
16459 * +---------->-----------+
16463 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16465 * We expect to achieve the following codegen:
16466 * mov rsi, rdx tmp = op1 // asgBlock
16467 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16469 * mov rcx, 0x76543210 cns = op2 // cond2Block
16470 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16472 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16474 * SKIP: // remainderBlock
16475 * tmp has the result.
16478 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16483 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16484 fgDispBasicBlocks(block, block, true);
16488 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16490 GenTreePtr dst = nullptr;
16491 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16492 noway_assert(dst != nullptr);
16494 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16496 // Get cond, true, false exprs for the qmark.
16497 GenTreePtr condExpr = qmark->gtGetOp1();
16498 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16499 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16501 // Get cond, true, false exprs for the nested qmark.
16502 GenTreePtr nestedQmark = falseExpr;
16503 GenTreePtr cond2Expr;
16504 GenTreePtr true2Expr;
16505 GenTreePtr false2Expr;
16507 if (nestedQmark->gtOper == GT_QMARK)
16509 cond2Expr = nestedQmark->gtGetOp1();
16510 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16511 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16513 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16514 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16518 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16519 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16520 // That means it does not match our pattern.
16522 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16523 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16524 // entire subtree we expected to be the nested question op.
16526 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16527 true2Expr = nestedQmark;
16528 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16530 assert(false2Expr->OperGet() == trueExpr->OperGet());
16532 // Clear flags as they are now going to be part of JTRUE.
16533 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16534 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16536 // Create the chain of blocks. See method header comment.
16537 // The order of blocks after this is the following:
16538 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16540 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16541 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16542 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16543 // remainderBlock will still be GC safe.
16544 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16545 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16546 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16548 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16549 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16550 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16551 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16553 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16555 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16556 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16557 if ((block->bbFlags & BBF_INTERNAL) == 0)
16559 helperBlock->bbFlags &= ~BBF_INTERNAL;
16560 cond2Block->bbFlags &= ~BBF_INTERNAL;
16561 cond1Block->bbFlags &= ~BBF_INTERNAL;
16562 asgBlock->bbFlags &= ~BBF_INTERNAL;
16563 helperBlock->bbFlags |= BBF_IMPORTED;
16564 cond2Block->bbFlags |= BBF_IMPORTED;
16565 cond1Block->bbFlags |= BBF_IMPORTED;
16566 asgBlock->bbFlags |= BBF_IMPORTED;
16569 // Chain the flow correctly.
16570 fgAddRefPred(asgBlock, block);
16571 fgAddRefPred(cond1Block, asgBlock);
16572 fgAddRefPred(cond2Block, cond1Block);
16573 fgAddRefPred(helperBlock, cond2Block);
16574 fgAddRefPred(remainderBlock, helperBlock);
16575 fgAddRefPred(remainderBlock, cond1Block);
16576 fgAddRefPred(remainderBlock, cond2Block);
16578 cond1Block->bbJumpDest = remainderBlock;
16579 cond2Block->bbJumpDest = remainderBlock;
16581 // Set the weights; some are guesses.
16582 asgBlock->inheritWeight(block);
16583 cond1Block->inheritWeight(block);
16584 cond2Block->inheritWeightPercentage(cond1Block, 50);
16585 helperBlock->inheritWeightPercentage(cond2Block, 50);
16587 // Append cond1 as JTRUE to cond1Block
16588 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16589 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16590 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16592 // Append cond2 as JTRUE to cond2Block
16593 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16594 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16595 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16597 // AsgBlock should get tmp = op1 assignment.
16598 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16599 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16600 fgInsertStmtAtEnd(asgBlock, trueStmt);
16602 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16603 gtReverseCond(cond2Expr);
16604 GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16605 GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16606 fgInsertStmtAtEnd(helperBlock, helperStmt);
16608 // Finally remove the nested qmark stmt.
16609 fgRemoveStmt(block, stmt);
16614 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16615 fgDispBasicBlocks(block, remainderBlock, true);
16620 /*****************************************************************************
16622 * Expand a statement with a top level qmark node. There are three cases, based
16623 * on whether the qmark has both "true" and "false" arms, or just one of them.
16634 * S0 -->-- ~C -->-- T F -->-- S1
16639 * -----------------------------------------
16648 * S0 -->-- ~C -->-- T -->-- S1
16650 * +-->-------------+
16653 * -----------------------------------------
16662 * S0 -->-- C -->-- F -->-- S1
16664 * +-->------------+
16667 * If the qmark assigns to a variable, then create tmps for "then"
16668 * and "else" results and assign the temp to the variable as a writeback step.
16670 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
16672 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16674 // Retrieve the Qmark node to be expanded.
16675 GenTreePtr dst = nullptr;
16676 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16677 if (qmark == nullptr)
16682 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16684 fgExpandQmarkForCastInstOf(block, stmt);
16691 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
16692 fgDispBasicBlocks(block, block, true);
16696 // Retrieve the operands.
16697 GenTreePtr condExpr = qmark->gtGetOp1();
16698 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16699 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16701 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16702 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16704 assert(!varTypeIsFloating(condExpr->TypeGet()));
16706 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
16707 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16708 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16710 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16711 // block ... condBlock ... elseBlock ... remainderBlock
16713 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16714 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16715 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16716 // remainderBlock will still be GC safe.
16717 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16718 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16719 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16721 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16722 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16724 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16725 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16726 if ((block->bbFlags & BBF_INTERNAL) == 0)
16728 condBlock->bbFlags &= ~BBF_INTERNAL;
16729 elseBlock->bbFlags &= ~BBF_INTERNAL;
16730 condBlock->bbFlags |= BBF_IMPORTED;
16731 elseBlock->bbFlags |= BBF_IMPORTED;
16734 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16736 condBlock->inheritWeight(block);
16738 fgAddRefPred(condBlock, block);
16739 fgAddRefPred(elseBlock, condBlock);
16740 fgAddRefPred(remainderBlock, elseBlock);
16742 BasicBlock* thenBlock = nullptr;
16743 if (hasTrueExpr && hasFalseExpr)
16748 // S0 -->-- ~C -->-- T F -->-- S1
16753 gtReverseCond(condExpr);
16754 condBlock->bbJumpDest = elseBlock;
16756 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
16757 thenBlock->bbJumpDest = remainderBlock;
16758 if ((block->bbFlags & BBF_INTERNAL) == 0)
16760 thenBlock->bbFlags &= ~BBF_INTERNAL;
16761 thenBlock->bbFlags |= BBF_IMPORTED;
16764 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
16766 fgAddRefPred(thenBlock, condBlock);
16767 fgAddRefPred(remainderBlock, thenBlock);
16769 thenBlock->inheritWeightPercentage(condBlock, 50);
16770 elseBlock->inheritWeightPercentage(condBlock, 50);
16772 else if (hasTrueExpr)
16775 // S0 -->-- ~C -->-- T -->-- S1
16777 // +-->-------------+
16780 gtReverseCond(condExpr);
16781 condBlock->bbJumpDest = remainderBlock;
16782 fgAddRefPred(remainderBlock, condBlock);
16783 // Since we have no false expr, use the one we'd already created.
16784 thenBlock = elseBlock;
16785 elseBlock = nullptr;
16787 thenBlock->inheritWeightPercentage(condBlock, 50);
16789 else if (hasFalseExpr)
16792 // S0 -->-- C -->-- F -->-- S1
16794 // +-->------------+
16797 condBlock->bbJumpDest = remainderBlock;
16798 fgAddRefPred(remainderBlock, condBlock);
16800 elseBlock->inheritWeightPercentage(condBlock, 50);
16803 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
16804 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16805 fgInsertStmtAtEnd(condBlock, jmpStmt);
16807 // Remove the original qmark statement.
16808 fgRemoveStmt(block, stmt);
16810 // Since we have top level qmarks, we either have a dst for it in which case
16811 // we need to create tmps for true and falseExprs, else just don't bother
16813 unsigned lclNum = BAD_VAR_NUM;
16814 if (dst != nullptr)
16816 assert(dst->gtOper == GT_LCL_VAR);
16817 lclNum = dst->gtLclVar.gtLclNum;
16821 assert(qmark->TypeGet() == TYP_VOID);
16826 if (dst != nullptr)
16828 trueExpr = gtNewTempAssign(lclNum, trueExpr);
16830 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16831 fgInsertStmtAtEnd(thenBlock, trueStmt);
16834 // Assign the falseExpr into the dst or tmp, insert in elseBlock
16837 if (dst != nullptr)
16839 falseExpr = gtNewTempAssign(lclNum, falseExpr);
16841 GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
16842 fgInsertStmtAtEnd(elseBlock, falseStmt);
16848 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
16849 fgDispBasicBlocks(block, remainderBlock, true);
16854 /*****************************************************************************
16856 * Expand GT_QMARK nodes from the flow graph into basic blocks.
16860 void Compiler::fgExpandQmarkNodes()
16864 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16866 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16868 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16870 fgPreExpandQmarkChecks(expr);
16872 fgExpandQmarkStmt(block, stmt);
16876 fgPostExpandQmarkChecks();
16879 compQmarkRationalized = true;
16883 /*****************************************************************************
16885 * Make sure we don't have any more GT_QMARK nodes.
16888 void Compiler::fgPostExpandQmarkChecks()
16890 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16892 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16894 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16895 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16901 /*****************************************************************************
16903 * Transform all basic blocks for codegen.
16906 void Compiler::fgMorph()
16908 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
16910 fgOutgoingArgTemps = nullptr;
16915 printf("*************** In fgMorph()\n");
16919 fgDispBasicBlocks(true);
16923 // Insert call to class constructor as the first basic block if
16924 // we were asked to do so.
16925 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
16926 impTokenLookupContextHandle /* context */) &
16927 CORINFO_INITCLASS_USE_HELPER)
16929 fgEnsureFirstBBisScratch();
16930 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
16934 if (opts.compGcChecks)
16936 for (unsigned i = 0; i < info.compArgsCount; i++)
16938 if (lvaTable[i].TypeGet() == TYP_REF)
16940 // confirm that the argument is a GC pointer (for debugging (GC stress))
16941 GenTreePtr op = gtNewLclvNode(i, TYP_REF);
16942 GenTreeArgList* args = gtNewArgList(op);
16943 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
16945 fgEnsureFirstBBisScratch();
16946 fgInsertStmtAtEnd(fgFirstBB, op);
16951 if (opts.compStackCheckOnRet)
16953 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
16954 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
16957 if (opts.compStackCheckOnCall)
16959 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
16960 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
16964 /* Filter out unimported BBs */
16966 fgRemoveEmptyBlocks();
16969 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
16970 fgDebugCheckBBlist(false, false);
16973 EndPhase(PHASE_MORPH_INIT);
16978 JITDUMP("trees after inlining\n");
16979 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
16982 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
16984 EndPhase(PHASE_MORPH_INLINE);
16986 /* Add any internal blocks/trees we may need */
16991 fgMultipleNots = false;
16995 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
16996 fgDebugCheckBBlist(false, false);
16999 fgRemoveEmptyTry();
17001 EndPhase(PHASE_EMPTY_TRY);
17003 fgRemoveEmptyFinally();
17005 EndPhase(PHASE_EMPTY_FINALLY);
17007 fgMergeFinallyChains();
17009 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17013 EndPhase(PHASE_CLONE_FINALLY);
17015 fgUpdateFinallyTargetFlags();
17017 /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
17018 fgMarkImplicitByRefArgs();
17020 EndPhase(PHASE_MORPH_IMPBYREF);
17022 /* Promote struct locals if necessary */
17023 fgPromoteStructs();
17025 /* Now it is the time to figure out what locals have address-taken. */
17026 fgMarkAddressExposedLocals();
17029 /* Now that locals have address-taken marked, we can safely apply stress. */
17031 fgStress64RsltMul();
17034 EndPhase(PHASE_STR_ADRLCL);
17036 /* Morph the trees in all the blocks of the method */
17040 EndPhase(PHASE_MORPH_GLOBAL);
17043 JITDUMP("trees after fgMorphBlocks\n");
17044 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17047 /* Decide the kind of code we want to generate */
17051 fgExpandQmarkNodes();
17054 compCurBB = nullptr;
17058 /*****************************************************************************
17060 * Promoting struct locals
17062 void Compiler::fgPromoteStructs()
17067 printf("*************** In fgPromoteStructs()\n");
17071 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17076 if (fgNoStructPromotion)
17082 // The code in this #if has been useful in debugging struct promotion issues, by
17083 // enabling selective enablement of the struct promotion optimization according to
17086 unsigned methHash = info.compMethodHash();
17087 char* lostr = getenv("structpromohashlo");
17088 unsigned methHashLo = 0;
17091 sscanf_s(lostr, "%x", &methHashLo);
17093 char* histr = getenv("structpromohashhi");
17094 unsigned methHashHi = UINT32_MAX;
17097 sscanf_s(histr, "%x", &methHashHi);
17099 if (methHash < methHashLo || methHash > methHashHi)
17105 printf("Promoting structs for method %s, hash = 0x%x.\n",
17106 info.compFullName, info.compMethodHash());
17107 printf(""); // in our logic this causes a flush
17112 if (info.compIsVarArgs)
17117 if (getNeedsGSSecurityCookie())
17125 printf("\nlvaTable before fgPromoteStructs\n");
17130 // The lvaTable might grow as we grab temps. Make a local copy here.
17131 unsigned startLvaCount = lvaCount;
17134 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17136 lvaStructPromotionInfo structPromotionInfo;
17137 bool tooManyLocals = false;
17139 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17141 // Whether this var got promoted
17142 bool promotedVar = false;
17143 LclVarDsc* varDsc = &lvaTable[lclNum];
17145 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17146 // its fields. Instead, we will attempt to enregister the entire struct.
17147 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17149 varDsc->lvRegStruct = true;
17151 // Don't promote if we have reached the tracking limit.
17152 else if (lvaHaveManyLocals())
17154 // Print the message first time when we detected this condition
17155 if (!tooManyLocals)
17157 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17159 tooManyLocals = true;
17161 else if (varTypeIsStruct(varDsc))
17163 bool shouldPromote;
17165 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17166 if (structPromotionInfo.canPromote)
17168 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17172 shouldPromote = false;
17176 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17177 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17178 static int structPromoVarNum = 0;
17179 structPromoVarNum++;
17180 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17185 // Promote the this struct local var.
17186 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17187 promotedVar = true;
17189 #ifdef _TARGET_ARM_
17190 if (structPromotionInfo.requiresScratchVar)
17192 // Ensure that the scratch variable is allocated, in case we
17193 // pass a promoted struct as an argument.
17194 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17196 lvaPromotedStructAssemblyScratchVar =
17197 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17198 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17201 #endif // _TARGET_ARM_
17205 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17207 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17208 // we will treat it as a reg struct.
17209 varDsc->lvRegStruct = true;
17216 printf("\nlvaTable after fgPromoteStructs\n");
17222 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17224 noway_assert(tree->OperGet() == GT_FIELD);
17226 GenTreePtr objRef = tree->gtField.gtFldObj;
17227 GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17228 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17230 /* Is this an instance data member? */
17232 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17234 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17235 LclVarDsc* varDsc = &lvaTable[lclNum];
17237 if (varTypeIsStruct(obj))
17239 if (varDsc->lvPromoted)
17242 unsigned fldOffset = tree->gtField.gtFldOffset;
17243 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17244 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17246 tree->SetOper(GT_LCL_VAR);
17247 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17248 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17249 tree->gtFlags &= GTF_NODE_MASK;
17250 tree->gtFlags &= ~GTF_GLOB_REF;
17252 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17253 if (parent->gtOper == GT_ASG)
17255 if (parent->gtOp.gtOp1 == tree)
17257 tree->gtFlags |= GTF_VAR_DEF;
17258 tree->gtFlags |= GTF_DONT_CSE;
17261 // Promotion of struct containing struct fields where the field
17262 // is a struct with a single pointer sized scalar type field: in
17263 // this case struct promotion uses the type of the underlying
17264 // scalar field as the type of struct field instead of recursively
17265 // promoting. This can lead to a case where we have a block-asgn
17266 // with its RHS replaced with a scalar type. Mark RHS value as
17267 // DONT_CSE so that assertion prop will not do const propagation.
17268 // The reason this is required is that if RHS of a block-asg is a
17269 // constant, then it is interpreted as init-block incorrectly.
17271 // TODO - This can also be avoided if we implement recursive struct
17273 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17275 tree->gtFlags |= GTF_DONT_CSE;
17281 printf("Replacing the field in promoted struct with a local var:\n");
17282 fgWalkPre->printModified = true;
17285 return WALK_SKIP_SUBTREES;
17291 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17292 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17293 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17294 // there is one extremely rare case where that won't be true. An enum type is a special value type
17295 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17296 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17297 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17298 // ldfld. For example:
17300 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17302 // .field public specialname rtspecialname int16 value__
17303 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17305 // .method public hidebysig static void Main() cil managed
17307 // .locals init (valuetype mynamespace.e_t V_0)
17310 // ldflda int16 mynamespace.e_t::value__
17314 // Normally, compilers will not generate the ldflda, since it is superfluous.
17316 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17317 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17318 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17319 // address taken, and the appropriate SHORT load will be done from memory in that case.
17321 if (tree->TypeGet() == obj->TypeGet())
17323 tree->ChangeOper(GT_LCL_VAR);
17324 tree->gtLclVarCommon.SetLclNum(lclNum);
17325 tree->gtFlags &= GTF_NODE_MASK;
17327 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17328 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17330 tree->gtFlags |= GTF_VAR_DEF;
17331 tree->gtFlags |= GTF_DONT_CSE;
17336 printf("Replacing the field in normed struct with the local var:\n");
17337 fgWalkPre->printModified = true;
17340 return WALK_SKIP_SUBTREES;
17345 return WALK_CONTINUE;
17348 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17350 noway_assert(tree->OperGet() == GT_LCL_FLD);
17352 unsigned lclNum = tree->gtLclFld.gtLclNum;
17353 LclVarDsc* varDsc = &lvaTable[lclNum];
17355 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17358 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17359 unsigned fieldLclIndex = 0;
17360 LclVarDsc* fldVarDsc = nullptr;
17362 if (fldOffset != BAD_VAR_NUM)
17364 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17365 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17366 fldVarDsc = &lvaTable[fieldLclIndex];
17369 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17370 #ifdef _TARGET_X86_
17371 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17375 // There is an existing sub-field we can use
17376 tree->gtLclFld.SetLclNum(fieldLclIndex);
17378 // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
17379 CLANG_FORMAT_COMMENT_ANCHOR;
17381 #ifdef _TARGET_ARM_
17382 assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17384 assert(varTypeIsIntegralOrI(tree->TypeGet()));
17386 if (varTypeCanReg(fldVarDsc->TypeGet()))
17388 // If the type is integer-ish, then we can use it as-is
17389 tree->ChangeOper(GT_LCL_VAR);
17390 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17391 tree->gtType = fldVarDsc->TypeGet();
17395 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17396 fgWalkPre->printModified = true;
17401 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17402 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17404 tree->gtFlags |= GTF_VAR_DEF;
17405 tree->gtFlags |= GTF_DONT_CSE;
17410 // There is no existing field that has all the parts that we need
17411 // So we must ensure that the struct lives in memory.
17412 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17415 // We can't convert this guy to a float because he really does have his
17417 varDsc->lvKeepType = 1;
17421 return WALK_SKIP_SUBTREES;
17424 return WALK_CONTINUE;
17427 /*****************************************************************************
17429 * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
17430 * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
17432 void Compiler::fgMarkImplicitByRefArgs()
17434 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
17438 printf("\n*************** In fgMarkImplicitByRefs()\n");
17442 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
17444 LclVarDsc* varDsc = &lvaTable[lclNum];
17446 assert(!varDsc->lvPromoted); // Called in the wrong order?
17448 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17452 if (varDsc->lvSize() > REGSIZE_BYTES)
17454 size = varDsc->lvSize();
17458 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17459 size = info.compCompHnd->getClassSize(typeHnd);
17462 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
17463 #if defined(_TARGET_AMD64_)
17464 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17465 #elif defined(_TARGET_ARM64_)
17466 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17469 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17470 // So I am now using it to indicate that this is one of the weird implicit
17472 // The address taken cleanup will look for references to locals marked like
17473 // this, and transform them appropriately.
17474 varDsc->lvIsTemp = 1;
17476 // Also marking them as BYREF will hide them from struct promotion.
17477 varDsc->lvType = TYP_BYREF;
17478 varDsc->lvRefCnt = 0;
17480 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17481 // make sure that the following flag is not set as these will force SSA to
17482 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
17484 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17487 // This should not be converted to a double in stress mode,
17488 // because it is really a pointer
17489 varDsc->lvKeepType = 1;
17493 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17497 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
17501 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
17504 /*****************************************************************************
17506 * Morph irregular parameters
17507 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
17509 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17511 #if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
17515 #else // _TARGET_AMD64_ || _TARGET_ARM64_
17517 GenTree* tree = *pTree;
17518 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
17520 bool isAddr = (tree->gtOper == GT_ADDR);
17521 GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
17522 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
17523 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
17525 if (!lvaIsImplicitByRefLocal(lclNum))
17527 // We only need to tranform the 'marked' implicit by ref parameters
17531 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
17532 // re-invoke the traversal to mark address-taken locals.
17533 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
17534 // If we do, leave it as-is.
17535 if (!varTypeIsStruct(lclVarTree))
17537 assert(lclVarTree->TypeGet() == TYP_BYREF);
17541 // We are overloading the lvRefCnt field here because real ref counts have not been set.
17542 lclVarDsc->lvRefCnt++;
17544 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
17545 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
17549 // change &X into just plain X
17550 tree->CopyFrom(lclVarTree, this);
17551 tree->gtType = TYP_BYREF;
17556 printf("Replacing address of implicit by ref struct parameter with byref:\n");
17557 fgWalkPre->printModified = true;
17563 // Change X into OBJ(X)
17564 var_types structType = tree->gtType;
17565 tree->gtType = TYP_BYREF;
17566 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
17567 if (structType == TYP_STRUCT)
17569 gtSetObjGcInfo(tree->AsObj());
17572 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
17573 // we could remove TGTANYWHERE
17574 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
17579 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
17580 gtDispTree(tree, nullptr, nullptr, true);
17581 fgWalkPre->printModified = true;
17589 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
17592 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
17593 enum AddrExposedContext
17595 AXC_None, // None of the below seen yet.
17596 AXC_Ind, // The address being computed is to be dereferenced.
17597 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
17598 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
17599 // addresses -- if the address addresses a field of a struct local, we need to consider
17600 // the entire local address taken (not just the field).
17601 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
17602 // on more bytes than the width of the storage location addressed. If this is a
17603 // field of a promoted struct local, declare the entire struct local address-taken.
17604 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
17605 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
17608 typedef ArrayStack<AddrExposedContext> AXCStack;
17610 // We use pre-post to simulate passing an argument in a recursion, via a stack.
17611 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17613 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
17614 (void)axcStack->Pop();
17615 return WALK_CONTINUE;
17618 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17620 GenTreePtr tree = *pTree;
17621 Compiler* comp = fgWalkPre->compiler;
17622 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
17623 AddrExposedContext axc = axcStack->Top();
17625 // In some situations, we have to figure out what the effective context is in which to
17626 // evaluate the current tree, depending on which argument position it is in its parent.
17633 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17634 assert(parent->OperGet() == GT_ADD);
17635 // Is one of the args a constant representing a field offset,
17636 // and is this the other? If so, Ind context.
17637 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
17641 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
17656 // Now recurse properly for the tree.
17657 switch (tree->gtOper)
17660 if (axc != AXC_Addr)
17662 axcStack->Push(AXC_Ind);
17666 axcStack->Push(AXC_None);
17668 return WALK_CONTINUE;
17672 if (axc == AXC_Addr)
17674 axcStack->Push(AXC_None);
17676 else if (tree->TypeGet() == TYP_STRUCT)
17678 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
17679 // or copyblk exceeds the size of a storage location whose address is used as one of the
17680 // arguments, then we have to consider that storage location (indeed, it's underlying containing
17681 // location) to be address taken. So get the width of the initblk or copyblk.
17683 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17684 GenTreeBlk* blk = tree->AsBlk();
17685 unsigned width = blk->gtBlkSize;
17686 noway_assert(width != 0);
17688 GenTree* addr = blk->Addr();
17689 if (addr->OperGet() == GT_ADDR)
17691 if (parent->gtOper == GT_ASG)
17693 if ((tree == parent->gtOp.gtOp1) &&
17694 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
17701 assert(parent->gtOper == GT_CALL);
17704 axcStack->Push(axc);
17708 // This is like a regular GT_IND.
17709 axcStack->Push(AXC_Ind);
17711 return WALK_CONTINUE;
17714 // Assume maximal width.
17715 axcStack->Push(AXC_IndWide);
17716 return WALK_CONTINUE;
17719 case GT_FIELD_LIST:
17720 axcStack->Push(AXC_None);
17721 return WALK_CONTINUE;
17724 // Taking the address of an array element never takes the address of a local.
17725 axcStack->Push(AXC_None);
17726 return WALK_CONTINUE;
17729 // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
17730 // convert to just "lcl". This is never an address-context use, since the local is already a
17731 // byref after this transformation.
17732 if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
17734 // Push something to keep the PostCB, which will pop it, happy.
17735 axcStack->Push(AXC_None);
17736 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second
17738 return WALK_SKIP_SUBTREES;
17740 #ifdef FEATURE_SIMD
17741 if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
17743 axcStack->Push(AXC_None);
17746 #endif // FEATURE_SIMD
17747 if (axc == AXC_Ind)
17749 axcStack->Push(AXC_None);
17751 else if (axc == AXC_IndWide)
17753 axcStack->Push(AXC_AddrWide);
17757 assert(axc == AXC_None);
17758 axcStack->Push(AXC_Addr);
17760 return WALK_CONTINUE;
17763 // First, handle a couple of special cases: field of promoted struct local, field
17764 // of "normed" struct.
17765 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
17767 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
17768 // label it addr-taken.
17769 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
17771 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17772 comp->lvaSetVarAddrExposed(lclNum);
17773 if (axc == AXC_AddrWide)
17775 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17776 if (varDsc->lvIsStructField)
17778 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17782 // Push something to keep the PostCB, which will pop it, happy.
17783 axcStack->Push(AXC_None);
17784 return WALK_SKIP_SUBTREES;
17788 // GT_FIELD is an implicit deref.
17789 if (axc == AXC_Addr)
17791 axcStack->Push(AXC_None);
17793 else if (axc == AXC_AddrWide)
17795 axcStack->Push(AXC_IndWide);
17799 axcStack->Push(AXC_Ind);
17801 return WALK_CONTINUE;
17806 assert(axc != AXC_Addr);
17807 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
17808 // else WALK_CONTINUE. We do the same here.
17809 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
17810 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
17812 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17813 comp->lvaSetVarAddrExposed(lclNum);
17814 if (axc == AXC_AddrWide)
17816 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17817 if (varDsc->lvIsStructField)
17819 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17823 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
17824 // what, but something to be popped by the post callback. If we're going
17825 // to analyze children, the LCL_FLD creates an Ind context, so use that.
17826 axcStack->Push(AXC_Ind);
17831 // On some architectures, some arguments are passed implicitly by reference.
17832 // Modify the trees to reflect that, if this local is one of those.
17833 if (comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
17835 // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
17836 // handled earlier. (And we can't have added anything to this address, since it was implicit.)
17837 assert(axc != AXC_Addr);
17841 if (axc == AXC_Addr || axc == AXC_AddrWide)
17843 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17844 comp->lvaSetVarAddrExposed(lclNum);
17845 if (axc == AXC_AddrWide)
17847 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17848 if (varDsc->lvIsStructField)
17850 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17854 // We may need to Quirk the storage size for this LCL_VAR
17855 // some PInvoke signatures incorrectly specify a ByRef to an INT32
17856 // when they actually write a SIZE_T or INT64
17857 if (axc == AXC_Addr)
17859 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
17863 // Push something to keep the PostCB, which will pop it, happy.
17864 axcStack->Push(AXC_None);
17865 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
17866 return WALK_SKIP_SUBTREES;
17869 assert(axc != AXC_Addr);
17870 // See below about treating pointer operations as wider indirection.
17871 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
17873 axcStack->Push(AXC_IndWide);
17875 else if (axc == AXC_Ind)
17877 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
17878 // If it's an add of a constant and an address, and the constant represents a field,
17879 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
17880 axcStack->Push(AXC_IndAdd);
17884 axcStack->Push(axc);
17886 return WALK_CONTINUE;
17888 // !!! Treat Pointer Operations as Wider Indirection
17890 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
17891 // indirection. This is because the pointers could be pointing to the address of struct fields
17892 // and could be used to perform operations on the whole struct or passed to another method.
17894 // When visiting a node in this pre-order walk, we do not know if we would in the future
17895 // encounter a GT_ADDR of a GT_FIELD below.
17897 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
17898 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
17899 // wider indirection context down the expr tree.
17901 // Example, in unsafe code,
17903 // IL_000e 12 00 ldloca.s 0x0
17904 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
17905 // IL_0015 12 00 ldloca.s 0x0
17906 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
17909 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
17910 // consider GT_SUB to be equivalent of an AXC_IndWide.
17912 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
17913 // them as AXC_IndWide.
17937 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
17938 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
17940 axcStack->Push(AXC_IndWide);
17941 return WALK_CONTINUE;
17946 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
17947 // handle the "Ind" propogation explicitly above.
17948 if (axc == AXC_Addr || axc == AXC_AddrWide)
17950 axcStack->Push(axc);
17954 axcStack->Push(AXC_None);
17956 return WALK_CONTINUE;
17960 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
17962 if (tree->TypeGet() != TYP_STRUCT)
17964 return width <= genTypeSize(tree->TypeGet());
17966 else if (tree->OperGet() == GT_LCL_VAR)
17968 assert(tree->TypeGet() == TYP_STRUCT);
17969 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17970 return width <= lvaTable[lclNum].lvExactSize;
17972 else if (tree->OperGet() == GT_FIELD)
17974 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
17975 return width <= info.compCompHnd->getClassSize(fldClass);
17977 else if (tree->OperGet() == GT_INDEX)
17979 return width <= tree->gtIndex.gtIndElemSize;
17987 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
17989 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
17991 switch (op1->OperGet())
17994 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
17996 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
17997 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18002 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18004 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18005 if (op1Fs != nullptr)
18007 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18008 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18011 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18013 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18014 if (op2Fs != nullptr)
18016 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18017 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18024 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18025 if (op1Fs != nullptr)
18027 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18028 op1->gtIntCon.gtFieldSeq = op1Fs;
18034 // Record in the general zero-offset map.
18035 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18040 /*****************************************************************************
18042 * Mark address-taken locals.
18045 void Compiler::fgMarkAddressExposedLocals()
18050 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18054 BasicBlock* block = fgFirstBB;
18055 noway_assert(block);
18059 /* Make the current basic block address available globally */
18065 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18067 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18068 AXCStack stk(this);
18069 stk.Push(AXC_None); // We start in neither an addr or ind context.
18070 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18073 block = block->bbNext;
18078 // fgNodesMayInterfere:
18079 // return true if moving nodes relative to each other can change the result of a computation
18082 // read: a node which reads
18085 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18087 LclVarDsc* srcVar = nullptr;
18089 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
18090 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18092 if (read->OperIsLocal())
18094 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18099 if (srcVar && srcVar->lvAddrExposed)
18103 else if (readIsIndir)
18109 else if (write->OperIsLocal())
18111 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18114 return dstVar->lvAddrExposed;
18116 else if (read->OperIsLocal())
18118 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18135 /** This predicate decides whether we will fold a tree with the structure:
18136 * x = x <op> y where x could be any arbitrary expression into
18139 * This modification is only performed when the target architecture supports
18140 * complex addressing modes. In the case of ARM for example, this transformation
18141 * yields no benefit.
18143 * In case this functions decides we can proceed to fold into an assignment operator
18144 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18145 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18146 * x <op>= y because the operator property.
18148 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18150 #if CPU_LOAD_STORE_ARCH
18151 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18153 #elif !defined(LEGACY_BACKEND)
18155 #else // defined(LEGACY_BACKEND)
18157 GenTreePtr op1 = tree->gtOp.gtOp1;
18158 GenTreePtr op2 = tree->gtGetOp2();
18159 genTreeOps cmop = op2->OperGet();
18161 /* Is the destination identical to the first RHS sub-operand? */
18162 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18165 Do not transform the following tree
18167 [0024CFA4] ----------- const int 1
18168 [0024CFDC] ----G------ | int
18169 [0024CF5C] ----------- lclVar ubyte V01 tmp0
18170 [0024D05C] -A--G------ = ubyte
18171 [0024D014] D------N--- lclVar ubyte V01 tmp0
18175 [0024CFA4] ----------- const int 1
18176 [0024D05C] -A--G------ |= ubyte
18177 [0024D014] U------N--- lclVar ubyte V01 tmp0
18179 , when V01 is a struct field local.
18182 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18184 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
18185 LclVarDsc* varDsc = lvaTable + lclNum;
18187 if (varDsc->lvIsStructField)
18196 else if (GenTree::OperIsCommutative(cmop))
18198 /* For commutative ops only, check for "a = x <op> a" */
18200 /* Should we be doing this at all? */
18201 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18206 /* Can we swap the operands to cmop ... */
18207 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18209 // Both sides must have side effects to prevent swap */
18213 /* Is the destination identical to the second RHS sub-operand? */
18214 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18221 #endif // defined(LEGACY_BACKEND)
18224 #ifdef FEATURE_SIMD
18226 //-----------------------------------------------------------------------------------
18227 // fgMorphCombineSIMDFieldAssignments:
18228 // If the RHS of the input stmt is a read for simd vector X Field, then this function
18229 // will keep reading next few stmts based on the vector size(2, 3, 4).
18230 // If the next stmts LHS are located contiguous and RHS are also located
18231 // contiguous, then we replace those statements with a copyblk.
18234 // block - BasicBlock*. block which stmt belongs to
18235 // stmt - GenTreeStmt*. the stmt node we want to check
18238 // if this funciton successfully optimized the stmts, then return true. Otherwise
18241 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18244 noway_assert(stmt->gtOper == GT_STMT);
18245 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18246 assert(tree->OperGet() == GT_ASG);
18248 GenTreePtr originalLHS = tree->gtOp.gtOp1;
18249 GenTreePtr prevLHS = tree->gtOp.gtOp1;
18250 GenTreePtr prevRHS = tree->gtOp.gtOp2;
18251 unsigned index = 0;
18252 var_types baseType = TYP_UNKNOWN;
18253 unsigned simdSize = 0;
18254 GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18256 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18258 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18262 var_types simdType = getSIMDTypeForSize(simdSize);
18263 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18264 int remainingAssignments = assignmentsCount;
18265 GenTreePtr curStmt = stmt->gtNext;
18266 GenTreePtr lastStmt = stmt;
18268 while (curStmt != nullptr && remainingAssignments > 0)
18270 GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18271 if (exp->OperGet() != GT_ASG)
18275 GenTreePtr curLHS = exp->gtGetOp1();
18276 GenTreePtr curRHS = exp->gtGetOp2();
18278 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18283 remainingAssignments--;
18287 lastStmt = curStmt;
18288 curStmt = curStmt->gtNext;
18291 if (remainingAssignments > 0)
18293 // if the left assignments number is bigger than zero, then this means
18294 // that the assignments are not assgining to the contiguously memory
18295 // locations from same vector.
18301 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18302 printf("From BB%02u, stmt", block->bbNum);
18304 printf(" to stmt");
18305 printTreeID(lastStmt);
18310 for (int i = 0; i < assignmentsCount; i++)
18312 fgRemoveStmt(block, stmt->gtNext);
18315 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18316 if (simdStructNode->OperIsLocal())
18318 setLclRelatedToSIMDIntrinsic(simdStructNode);
18320 GenTree* copyBlkAddr = copyBlkDst;
18321 if (copyBlkAddr->gtOper == GT_LEA)
18323 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18325 GenTreeLclVarCommon* localDst = nullptr;
18326 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18328 setLclRelatedToSIMDIntrinsic(localDst);
18331 GenTree* simdStructAddr;
18332 if (simdStructNode->TypeGet() == TYP_BYREF)
18334 assert(simdStructNode->OperIsLocal());
18335 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18336 simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
18340 assert(varTypeIsSIMD(simdStructNode));
18346 printf("\nBB%02u stmt", block->bbNum);
18348 printf("(before)\n");
18353 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
18354 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
18355 blkNode->gtType = simdType;
18356 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
18357 false, // not volatile
18358 true); // copyBlock
18360 stmt->gtStmt.gtStmtExpr = tree;
18362 // Since we generated a new address node which didn't exist before,
18363 // we should expose this address manually here.
18364 AXCStack stk(this);
18365 stk.Push(AXC_None);
18366 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18371 printf("\nReplaced BB%02u stmt", block->bbNum);
18373 printf("(after)\n");
18380 #endif // FEATURE_SIMD