1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTreePtr oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtFlags |= GTF_CALL;
69 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
71 tree->gtCall.gtCallType = CT_HELPER;
72 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
73 tree->gtCall.gtCallArgs = args;
74 tree->gtCall.gtCallObjp = nullptr;
75 tree->gtCall.gtCallLateArgs = nullptr;
76 tree->gtCall.fgArgInfo = nullptr;
77 tree->gtCall.gtRetClsHnd = nullptr;
78 tree->gtCall.gtCallMoreFlags = 0;
79 tree->gtCall.gtInlineCandidateInfo = nullptr;
80 tree->gtCall.gtControlExpr = nullptr;
83 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
87 // Helper calls are never candidates.
89 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
92 #ifdef FEATURE_READYTORUN_COMPILER
93 tree->gtCall.gtEntryPoint.addr = nullptr;
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97 if (varTypeIsLong(tree))
99 GenTreeCall* callNode = tree->AsCall();
100 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101 retTypeDesc->Reset();
102 retTypeDesc->InitializeLongReturnType(this);
103 callNode->ClearOtherRegs();
105 #endif // _TARGET_XXX_
107 /* Perform the morphing */
109 tree = fgMorphArgs(tree->AsCall());
114 /*****************************************************************************
116 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
117 * This is done when code generation can't create straight-line code to do it.
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
121 #ifndef LEGACY_BACKEND
123 #else // LEGACY_BACKEND
124 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
128 /*****************************************************************************
130 * Morph a cast node (we perform some very simple transformations here).
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
139 noway_assert(tree->gtOper == GT_CAST);
140 noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
142 /* The first sub-operand is the thing being cast */
144 GenTreePtr oper = tree->gtCast.CastOp();
146 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
148 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
149 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
150 // morphing code to see that type.
151 fgMorphImplicitByRefArgs(oper);
154 var_types srcType = genActualType(oper->TypeGet());
157 var_types dstType = tree->CastToType();
158 unsigned dstSize = genTypeSize(dstType);
160 // See if the cast has to be done in two steps. R -> I
161 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
163 // Only x86 must go through TYP_DOUBLE to get to all
164 // integral types everybody else can get straight there
165 // except for when using helpers
166 if (srcType == TYP_FLOAT
167 #if !FEATURE_STACK_FP_X87
169 #if defined(_TARGET_ARM64_)
170 // Amd64: src = float, dst is overflow conversion.
171 // This goes through helper and hence src needs to be converted to double.
172 && tree->gtOverflow()
173 #elif defined(_TARGET_AMD64_)
174 // Amd64: src = float, dst = uint64 or overflow conversion.
175 // This goes through helper and hence src needs to be converted to double.
176 && (tree->gtOverflow() || (dstType == TYP_ULONG))
177 #elif defined(_TARGET_ARM_)
178 // Arm: src = float, dst = int64/uint64 or overflow conversion.
179 && (tree->gtOverflow() || varTypeIsLong(dstType))
182 #endif // FEATURE_STACK_FP_X87
185 oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
188 // do we need to do it in two steps R -> I, '-> smallType
189 CLANG_FORMAT_COMMENT_ANCHOR;
191 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
192 if (dstSize < genTypeSize(TYP_INT))
194 oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
195 oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
196 tree->gtFlags &= ~GTF_UNSIGNED;
199 if (dstSize < sizeof(void*))
201 oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
202 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
207 /* Note that if we need to use a helper call then we can not morph oper */
208 if (!tree->gtOverflow())
210 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
216 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
217 #ifdef LEGACY_BACKEND
218 // the RyuJIT backend does not use the x87 FPU and therefore
219 // does not support folding the cast conv.i4(round.d(d))
220 if ((oper->gtOper == GT_INTRINSIC) &&
221 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
223 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
224 oper->gtType = dstType;
225 return fgMorphTree(oper);
227 // if SSE2 is not enabled, we need the helper
229 #endif // LEGACY_BACKEND
230 if (!opts.compCanUseSSE2)
232 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
235 #endif // _TARGET_X86_
239 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
242 #else // _TARGET_ARM_
244 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
245 #endif // _TARGET_ARM_
247 #ifdef _TARGET_AMD64_
248 // SSE2 has instructions to convert a float/double directly to a long
253 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
254 #endif //_TARGET_AMD64_
256 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
260 #endif // _TARGET_ARM64_
267 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
269 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
271 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
273 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
278 noway_assert(!"Unexpected dstType");
281 #ifndef _TARGET_64BIT_
282 // The code generation phase (for x86 & ARM32) does not handle casts
283 // directly from [u]long to anything other than [u]int. Insert an
284 // intermediate cast to native int.
285 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
287 oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
288 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
289 tree->gtFlags &= ~GTF_UNSIGNED;
291 #endif //!_TARGET_64BIT_
294 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
295 !varTypeIsLong(oper->gtCast.CastOp()))
297 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
298 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
299 // This happens semi-frequently because there is no IL 'conv.r4.un'
300 oper->gtType = TYP_FLOAT;
301 oper->CastToType() = TYP_FLOAT;
302 return fgMorphTree(oper);
304 // converts long/ulong --> float/double casts into helper calls.
305 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
307 if (dstType == TYP_FLOAT)
309 // there is only a double helper, so we
310 // - change the dsttype to double
311 // - insert a cast from double to float
312 // - recurse into the resulting tree
313 tree->CastToType() = TYP_DOUBLE;
314 tree->gtType = TYP_DOUBLE;
316 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
318 return fgMorphTree(tree);
320 if (tree->gtFlags & GTF_UNSIGNED)
321 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
322 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
324 #endif //_TARGET_ARM_
326 #ifdef _TARGET_AMD64_
327 // Do we have to do two step U4/8 -> R4/8 ?
328 // Codegen supports the following conversion as one-step operation
332 // The following conversions are performed as two-step operations using above.
333 // U4 -> R4/8 = U4-> Long -> R4/8
334 // U8 -> R4 = U8 -> R8 -> R4
335 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
337 srcType = genUnsignedType(srcType);
339 if (srcType == TYP_ULONG)
341 if (dstType == TYP_FLOAT)
343 // Codegen can handle U8 -> R8 conversion.
344 // U8 -> R4 = U8 -> R8 -> R4
345 // - change the dsttype to double
346 // - insert a cast from double to float
347 // - recurse into the resulting tree
348 tree->CastToType() = TYP_DOUBLE;
349 tree->gtType = TYP_DOUBLE;
350 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
351 return fgMorphTree(tree);
354 else if (srcType == TYP_UINT)
356 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
357 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
358 tree->gtFlags &= ~GTF_UNSIGNED;
361 #endif // _TARGET_AMD64_
364 // Do we have to do two step U4/8 -> R4/8 ?
365 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
367 srcType = genUnsignedType(srcType);
369 if (srcType == TYP_ULONG)
371 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
373 else if (srcType == TYP_UINT)
375 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
376 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
377 tree->gtFlags &= ~GTF_UNSIGNED;
378 #ifndef LEGACY_BACKEND
379 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
383 #ifndef LEGACY_BACKEND
384 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
386 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
389 #endif //_TARGET_XARCH_
390 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
392 // We are casting away GC information. we would like to just
393 // change the type to int, however this gives the emitter fits because
394 // it believes the variable is a GC variable at the begining of the
395 // instruction group, but is not turned non-gc by the code generator
396 // we fix this by copying the GC pointer to a non-gc pointer temp.
397 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
399 // We generate an assignment to an int and then do the cast from an int. With this we avoid
400 // the gc problem and we allow casts to bytes, longs, etc...
401 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
402 oper->gtType = TYP_I_IMPL;
403 GenTreePtr asg = gtNewTempAssign(lclNum, oper);
404 oper->gtType = srcType;
407 GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
409 // Generate the comma tree
410 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
412 return fgMorphTree(oper);
415 // Look for narrowing casts ([u]long -> [u]int) and try to push them
416 // down into the operand before morphing it.
418 // It doesn't matter if this is cast is from ulong or long (i.e. if
419 // GTF_UNSIGNED is set) because the transformation is only applied to
420 // overflow-insensitive narrowing casts, which always silently truncate.
422 // Note that casts from [u]long to small integer types are handled above.
423 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
425 // As a special case, look for overflow-sensitive casts of an AND
426 // expression, and see if the second operand is a small constant. Since
427 // the result of an AND is bound by its smaller operand, it may be
428 // possible to prove that the cast won't overflow, which will in turn
429 // allow the cast's operand to be transformed.
430 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
432 GenTreePtr andOp2 = oper->gtOp.gtOp2;
434 // Special case to the special case: AND with a casted int.
435 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
437 // gtFoldExprConst will deal with whether the cast is signed or
438 // unsigned, or overflow-sensitive.
439 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
442 // Look for a constant less than 2^{32} for a cast to uint, or less
443 // than 2^{31} for a cast to int.
444 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
446 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
448 // This cast can't overflow.
449 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
453 // Only apply this transformation during global morph,
454 // when neither the cast node nor the oper node may throw an exception
455 // based on the upper 32 bits.
457 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
459 // For these operations the lower 32 bits of the result only depends
460 // upon the lower 32 bits of the operands
462 if (oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG, GT_LSH))
464 DEBUG_DESTROY_NODE(tree);
466 // Insert narrowing casts for op1 and op2
467 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
468 if (oper->gtOp.gtOp2 != nullptr)
470 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
473 // Clear the GT_MUL_64RSLT if it is set
474 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
476 oper->gtFlags &= ~GTF_MUL_64RSLT;
479 // The operation now produces a 32-bit result.
480 oper->gtType = TYP_INT;
482 // Remorph the new tree as the casts that we added may be folded away.
483 return fgMorphTree(oper);
489 noway_assert(tree->gtOper == GT_CAST);
491 /* Morph the operand */
492 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
494 /* Reset the call flag */
495 tree->gtFlags &= ~GTF_CALL;
497 /* unless we have an overflow cast, reset the except flag */
498 if (!tree->gtOverflow())
500 tree->gtFlags &= ~GTF_EXCEPT;
503 /* Just in case new side effects were introduced */
504 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
506 srcType = oper->TypeGet();
508 /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
509 if (tree->gtFlags & GTF_UNSIGNED)
511 srcType = genUnsignedType(srcType);
514 srcSize = genTypeSize(srcType);
516 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
518 /* See if we can discard the cast */
519 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
521 if (srcType == dstType)
522 { // Certainly if they are identical it is pointless
526 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
528 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
529 LclVarDsc* varDsc = &lvaTable[varNum];
530 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
536 bool unsignedSrc = varTypeIsUnsigned(srcType);
537 bool unsignedDst = varTypeIsUnsigned(dstType);
538 bool signsDiffer = (unsignedSrc != unsignedDst);
540 // For same sized casts with
541 // the same signs or non-overflow cast we discard them as well
542 if (srcSize == dstSize)
544 /* This should have been handled above */
545 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
552 if (!tree->gtOverflow())
554 /* For small type casts, when necessary we force
555 the src operand to the dstType and allow the
556 implied load from memory to perform the casting */
557 if (varTypeIsSmall(srcType))
559 switch (oper->gtOper)
565 oper->gtType = dstType;
578 if (srcSize < dstSize) // widening cast
580 // Keep any long casts
581 if (dstSize == sizeof(int))
583 // Only keep signed to unsigned widening cast with overflow check
584 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
590 // Casts from signed->unsigned can never overflow while widening
592 if (unsignedSrc || !unsignedDst)
594 tree->gtFlags &= ~GTF_OVERFLOW;
599 // Try to narrow the operand of the cast and discard the cast
600 // Note: Do not narrow a cast that is marked as a CSE
601 // And do not narrow if the oper is marked as a CSE either
603 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
604 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
606 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
608 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
609 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
611 oper = oper->gtCast.CastOp();
618 switch (oper->gtOper)
620 /* If the operand is a constant, we'll fold it */
626 GenTreePtr oldTree = tree;
628 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
630 // Did we get a comma throw as a result of gtFoldExprConst?
631 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
633 noway_assert(fgIsCommaThrow(tree));
634 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
635 fgMorphTreeDone(tree);
638 else if (tree->gtOper != GT_CAST)
643 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
648 /* Check for two consecutive casts into the same dstType */
649 if (!tree->gtOverflow())
651 var_types dstType2 = oper->CastToType();
652 if (dstType == dstType2)
659 #ifdef LEGACY_BACKEND
661 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
662 so that the code generator will know not to convert the result
663 of the idiv to a regpair */
665 if (dstType == TYP_INT)
667 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
672 if (dstType == TYP_UINT)
674 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
678 #endif // LEGACY_BACKEND
681 // Check for cast of a GT_COMMA with a throw overflow
682 // Bug 110829: Since this optimization will bash the types
683 // neither oper or commaOp2 can be CSE candidates
684 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
686 GenTreePtr commaOp2 = oper->gtOp.gtOp2;
688 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
690 // need type of oper to be same as tree
691 if (tree->gtType == TYP_LONG)
693 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
694 commaOp2->gtIntConCommon.SetLngValue(0);
695 /* Change the types of oper and commaOp2 to TYP_LONG */
696 oper->gtType = commaOp2->gtType = TYP_LONG;
698 else if (varTypeIsFloating(tree->gtType))
700 commaOp2->ChangeOperConst(GT_CNS_DBL);
701 commaOp2->gtDblCon.gtDconVal = 0.0;
702 // Change the types of oper and commaOp2
703 // X87 promotes everything to TYP_DOUBLE
704 // But other's are a little more precise
705 const var_types newTyp
706 #if FEATURE_X87_DOUBLES
708 #else // FEATURE_X87_DOUBLES
710 #endif // FEATURE_X87_DOUBLES
711 oper->gtType = commaOp2->gtType = newTyp;
715 commaOp2->ChangeOperConst(GT_CNS_INT);
716 commaOp2->gtIntCon.gtIconVal = 0;
717 /* Change the types of oper and commaOp2 to TYP_INT */
718 oper->gtType = commaOp2->gtType = TYP_INT;
722 if (vnStore != nullptr)
724 fgValueNumberTreeConst(commaOp2);
727 /* Return the GT_COMMA node as the new tree */
734 } /* end switch (oper->gtOper) */
737 if (tree->gtOverflow())
739 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
746 /* Here we've eliminated the cast, so just return it's operand */
747 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
749 DEBUG_DESTROY_NODE(tree);
756 /*****************************************************************************
758 * Perform an unwrap operation on a Proxy object
761 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
763 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
765 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
768 // Perform the unwrap:
770 // This requires two extra indirections.
771 // We mark these indirections as 'invariant' and
772 // the CSE logic will hoist them when appropriate.
774 // Note that each dereference is a GC pointer
776 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
778 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
779 objRef->gtFlags |= GTF_IND_INVARIANT;
781 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
783 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
784 objRef->gtFlags |= GTF_IND_INVARIANT;
786 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
790 /*****************************************************************************
792 * Morph an argument list; compute the pointer argument count in the process.
794 * NOTE: This function can be called from any place in the JIT to perform re-morphing
795 * due to graph altering modifications such as copy / constant propagation
798 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
800 assert(tree->gtOper == GT_LIST);
803 if (tree->gtOp.gtOp2)
805 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
808 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
810 tree->gtFlags &= ~GTF_ALL_EFFECT;
811 tree->gtFlags |= flags;
813 return tree->gtFlags;
817 void fgArgTabEntry::Dump()
819 printf("fgArgTabEntry[arg %u", argNum);
820 if (regNum != REG_STK)
822 printf(", %s, regs=%u", getRegName(regNum), numRegs);
826 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
828 printf(", align=%u", alignment);
829 if (lateArgInx != (unsigned)-1)
831 printf(", lateArgInx=%u", lateArgInx);
839 printf(", tmpNum=V%02u", tmpNum);
843 printf(", needPlace");
851 printf(", processed");
859 printf(", isBackFilled");
863 printf(", isNonStandard");
869 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
873 argCount = 0; // filled in arg count, starts at zero
874 nextSlotNum = INIT_ARG_STACK_SLOT;
876 #if defined(UNIX_X86_ABI)
877 alignmentDone = false;
881 #if FEATURE_FIXED_OUT_ARGS
885 argTableSize = numArgs; // the allocated table size
888 hasStackArgs = false;
889 argsComplete = false;
892 if (argTableSize == 0)
898 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
902 /*****************************************************************************
904 * fgArgInfo Copy Constructor
906 * This method needs to act like a copy constructor for fgArgInfo.
907 * The newCall needs to have its fgArgInfo initialized such that
908 * we have newCall that is an exact copy of the oldCall.
909 * We have to take care since the argument information
910 * in the argTable contains pointers that must point to the
911 * new arguments and not the old arguments.
913 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
915 fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
917 compiler = oldArgInfo->compiler;
919 argCount = 0; // filled in arg count, starts at zero
920 nextSlotNum = INIT_ARG_STACK_SLOT;
921 stkLevel = oldArgInfo->stkLevel;
922 #if defined(UNIX_X86_ABI)
923 alignmentDone = oldArgInfo->alignmentDone;
924 stkSizeBytes = oldArgInfo->stkSizeBytes;
925 padStkAlign = oldArgInfo->padStkAlign;
927 #if FEATURE_FIXED_OUT_ARGS
928 outArgSize = oldArgInfo->outArgSize;
930 argTableSize = oldArgInfo->argTableSize;
931 argsComplete = false;
933 if (argTableSize > 0)
935 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
936 for (unsigned inx = 0; inx < argTableSize; inx++)
938 argTable[inx] = nullptr;
942 assert(oldArgInfo->argsComplete);
944 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
945 // so we can iterate over these argument lists more uniformly.
946 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
947 GenTreeArgList* newArgs;
948 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
949 GenTreeArgList* oldArgs;
950 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
952 if (newCall->gtCallObjp == nullptr)
954 assert(oldCall->gtCallObjp == nullptr);
955 newArgs = newCall->gtCallArgs;
956 oldArgs = oldCall->gtCallArgs;
960 assert(oldCall->gtCallObjp != nullptr);
961 newArgObjp.Current() = newCall->gtCallArgs;
962 newArgs = &newArgObjp;
963 oldArgObjp.Current() = oldCall->gtCallObjp;
964 oldArgs = &oldArgObjp;
969 GenTreeArgList* newParent = nullptr;
970 GenTreeArgList* oldParent = nullptr;
971 fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
972 bool scanRegArgs = false;
976 /* Get hold of the next argument values for the oldCall and newCall */
978 newCurr = newArgs->Current();
979 oldCurr = oldArgs->Current();
980 if (newArgs != &newArgObjp)
987 assert(newParent == nullptr && oldParent == nullptr);
989 newArgs = newArgs->Rest();
990 oldArgs = oldArgs->Rest();
992 fgArgTabEntryPtr oldArgTabEntry = nullptr;
993 fgArgTabEntryPtr newArgTabEntry = nullptr;
995 for (unsigned inx = 0; inx < argTableSize; inx++)
997 oldArgTabEntry = oldArgTable[inx];
999 if (oldArgTabEntry->parent == oldParent)
1001 assert((oldParent == nullptr) == (newParent == nullptr));
1003 // We have found the matching "parent" field in oldArgTabEntry
1005 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1007 // First block copy all fields
1009 *newArgTabEntry = *oldArgTabEntry;
1011 // Then update all GenTreePtr fields in the newArgTabEntry
1013 newArgTabEntry->parent = newParent;
1015 // The node field is likely to have been updated
1016 // to point at a node in the gtCallLateArgs list
1018 if (oldArgTabEntry->node == oldCurr)
1020 // node is not pointing into the gtCallLateArgs list
1021 newArgTabEntry->node = newCurr;
1025 // node must be pointing into the gtCallLateArgs list
1027 // We will fix this pointer up in the next loop
1029 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1034 // Now initialize the proper element in the argTable array
1036 argTable[inx] = newArgTabEntry;
1040 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1042 assert(newArgTabEntry != nullptr);
1047 newArgs = newCall->gtCallLateArgs;
1048 oldArgs = oldCall->gtCallLateArgs;
1052 /* Get hold of the next argument values for the oldCall and newCall */
1054 assert(newArgs->OperIsList());
1056 newCurr = newArgs->Current();
1057 newArgs = newArgs->Rest();
1059 assert(oldArgs->OperIsList());
1061 oldCurr = oldArgs->Current();
1062 oldArgs = oldArgs->Rest();
1064 fgArgTabEntryPtr oldArgTabEntry = nullptr;
1065 fgArgTabEntryPtr newArgTabEntry = nullptr;
1067 for (unsigned inx = 0; inx < argTableSize; inx++)
1069 oldArgTabEntry = oldArgTable[inx];
1071 if (oldArgTabEntry->node == oldCurr)
1073 // We have found the matching "node" field in oldArgTabEntry
1075 newArgTabEntry = argTable[inx];
1076 assert(newArgTabEntry != nullptr);
1078 // update the "node" GenTreePtr fields in the newArgTabEntry
1080 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1082 newArgTabEntry->node = newCurr;
1089 argCount = oldArgInfo->argCount;
1090 nextSlotNum = oldArgInfo->nextSlotNum;
1091 hasRegArgs = oldArgInfo->hasRegArgs;
1092 hasStackArgs = oldArgInfo->hasStackArgs;
1093 argsComplete = true;
1097 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1099 assert(argCount < argTableSize);
1100 argTable[argCount] = curArgTabEntry;
1104 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1105 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1107 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1109 curArgTabEntry->argNum = argNum;
1110 curArgTabEntry->node = node;
1111 curArgTabEntry->parent = parent;
1112 curArgTabEntry->regNum = regNum;
1113 curArgTabEntry->slotNum = 0;
1114 curArgTabEntry->numRegs = numRegs;
1115 curArgTabEntry->numSlots = 0;
1116 curArgTabEntry->alignment = alignment;
1117 curArgTabEntry->lateArgInx = (unsigned)-1;
1118 curArgTabEntry->tmpNum = (unsigned)-1;
1119 curArgTabEntry->isSplit = false;
1120 curArgTabEntry->isTmp = false;
1121 curArgTabEntry->needTmp = false;
1122 curArgTabEntry->needPlace = false;
1123 curArgTabEntry->processed = false;
1124 curArgTabEntry->isHfaRegArg = false;
1125 curArgTabEntry->isBackFilled = false;
1126 curArgTabEntry->isNonStandard = false;
1129 AddArg(curArgTabEntry);
1130 return curArgTabEntry;
1133 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1134 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1140 const bool isStruct,
1141 const regNumber otherRegNum,
1142 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1144 fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1145 assert(curArgTabEntry != nullptr);
1147 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1148 // PlaceHolder node (in case of needed late argument, for example.)
1149 // This requires using of an extra flag. At creation time the state is right, so
1150 // and this assert enforces that.
1151 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1152 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1153 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1155 if (isStruct && structDescPtr != nullptr)
1157 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1160 return curArgTabEntry;
1162 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1164 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
1169 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1171 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1173 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1175 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1176 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1177 // PlaceHolder node (in case of needed late argument, for example.)
1178 // This reqires using of an extra flag. At creation time the state is right, so
1179 // and this assert enforces that.
1180 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1181 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1182 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1184 curArgTabEntry->argNum = argNum;
1185 curArgTabEntry->node = node;
1186 curArgTabEntry->parent = parent;
1187 curArgTabEntry->regNum = REG_STK;
1188 curArgTabEntry->slotNum = nextSlotNum;
1189 curArgTabEntry->numRegs = 0;
1190 curArgTabEntry->numSlots = numSlots;
1191 curArgTabEntry->alignment = alignment;
1192 curArgTabEntry->lateArgInx = (unsigned)-1;
1193 curArgTabEntry->tmpNum = (unsigned)-1;
1194 curArgTabEntry->isSplit = false;
1195 curArgTabEntry->isTmp = false;
1196 curArgTabEntry->needTmp = false;
1197 curArgTabEntry->needPlace = false;
1198 curArgTabEntry->processed = false;
1199 curArgTabEntry->isHfaRegArg = false;
1200 curArgTabEntry->isBackFilled = false;
1201 curArgTabEntry->isNonStandard = false;
1203 hasStackArgs = true;
1204 AddArg(curArgTabEntry);
1206 nextSlotNum += numSlots;
1207 return curArgTabEntry;
1210 void fgArgInfo::RemorphReset()
1212 nextSlotNum = INIT_ARG_STACK_SLOT;
1215 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1216 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1218 fgArgTabEntryPtr curArgTabEntry = nullptr;
1219 unsigned regArgInx = 0;
1222 for (inx = 0; inx < argCount; inx++)
1224 curArgTabEntry = argTable[inx];
1225 if (curArgTabEntry->argNum == argNum)
1232 if (curArgTabEntry->parent != nullptr)
1234 assert(curArgTabEntry->parent->OperIsList());
1235 argx = curArgTabEntry->parent->Current();
1236 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1240 argx = curArgTabEntry->node;
1249 // if this was a nonstandard arg the table is definitive
1250 if (curArgTabEntry->isNonStandard)
1252 regNum = curArgTabEntry->regNum;
1255 assert(curArgTabEntry->argNum == argNum);
1256 assert(curArgTabEntry->regNum == regNum);
1257 assert(curArgTabEntry->alignment == alignment);
1258 assert(curArgTabEntry->parent == parent);
1260 if (curArgTabEntry->node != node)
1262 GenTreePtr argx = nullptr;
1263 unsigned regIndex = 0;
1265 /* process the register argument list */
1266 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1268 argx = list->Current();
1269 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1270 if (regIndex == regArgInx)
1275 assert(regIndex == regArgInx);
1276 assert(regArgInx == curArgTabEntry->lateArgInx);
1278 if (curArgTabEntry->node != argx)
1280 curArgTabEntry->node = argx;
1283 return curArgTabEntry;
1286 void fgArgInfo::RemorphStkArg(
1287 unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1289 fgArgTabEntryPtr curArgTabEntry = nullptr;
1290 bool isRegArg = false;
1291 unsigned regArgInx = 0;
1295 for (inx = 0; inx < argCount; inx++)
1297 curArgTabEntry = argTable[inx];
1299 if (curArgTabEntry->parent != nullptr)
1301 assert(curArgTabEntry->parent->OperIsList());
1302 argx = curArgTabEntry->parent->Current();
1303 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1307 argx = curArgTabEntry->node;
1311 if (curArgTabEntry->argNum == argNum)
1322 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1324 assert(curArgTabEntry->argNum == argNum);
1325 assert(curArgTabEntry->slotNum == nextSlotNum);
1326 assert(curArgTabEntry->numSlots == numSlots);
1327 assert(curArgTabEntry->alignment == alignment);
1328 assert(curArgTabEntry->parent == parent);
1329 assert(parent->OperIsList());
1331 #if FEATURE_FIXED_OUT_ARGS
1332 if (curArgTabEntry->node != node)
1336 GenTreePtr argx = nullptr;
1337 unsigned regIndex = 0;
1339 /* process the register argument list */
1340 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1342 argx = list->Current();
1343 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1344 if (regIndex == regArgInx)
1349 assert(regIndex == regArgInx);
1350 assert(regArgInx == curArgTabEntry->lateArgInx);
1352 if (curArgTabEntry->node != argx)
1354 curArgTabEntry->node = argx;
1359 assert(parent->Current() == node);
1360 curArgTabEntry->node = node;
1364 curArgTabEntry->node = node;
1367 nextSlotNum += numSlots;
1370 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1372 fgArgTabEntryPtr curArgTabEntry = nullptr;
1373 assert(argNum < argCount);
1374 for (unsigned inx = 0; inx < argCount; inx++)
1376 curArgTabEntry = argTable[inx];
1377 if (curArgTabEntry->argNum == argNum)
1383 assert(numRegs > 0);
1384 assert(numSlots > 0);
1388 assert(curArgTabEntry->isSplit == true);
1389 assert(curArgTabEntry->numRegs == numRegs);
1390 assert(curArgTabEntry->numSlots == numSlots);
1394 curArgTabEntry->isSplit = true;
1395 curArgTabEntry->numRegs = numRegs;
1396 curArgTabEntry->numSlots = numSlots;
1398 nextSlotNum += numSlots;
1401 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1403 fgArgTabEntryPtr curArgTabEntry = nullptr;
1404 assert(argNum < argCount);
1405 for (unsigned inx = 0; inx < argCount; inx++)
1407 curArgTabEntry = argTable[inx];
1408 if (curArgTabEntry->argNum == argNum)
1413 assert(curArgTabEntry->parent->Current() == newNode);
1415 curArgTabEntry->node = newNode;
1416 curArgTabEntry->tmpNum = tmpNum;
1417 curArgTabEntry->isTmp = true;
1420 void fgArgInfo::ArgsComplete()
1422 bool hasStackArgs = false;
1423 bool hasStructRegArg = false;
1425 for (unsigned curInx = 0; curInx < argCount; curInx++)
1427 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1428 assert(curArgTabEntry != nullptr);
1429 GenTreePtr argx = curArgTabEntry->node;
1431 if (curArgTabEntry->regNum == REG_STK)
1433 hasStackArgs = true;
1434 #if !FEATURE_FIXED_OUT_ARGS
1435 // On x86 we use push instructions to pass arguments:
1436 // The non-register arguments are evaluated and pushed in order
1437 // and they are never evaluated into temps
1442 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1443 else if (curArgTabEntry->isSplit)
1445 hasStructRegArg = true;
1446 hasStackArgs = true;
1449 else // we have a register argument, next we look for a struct type.
1451 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1453 hasStructRegArg = true;
1457 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1458 and every earlier argument (except constants) must be evaluated into temps
1459 since there may be other arguments that follow and they may use the value being assigned.
1461 EXAMPLE: ArgTab is "a, a=5, a"
1462 -> when we see the second arg "a=5"
1463 we know the first two arguments "a, a=5" have to be evaluated into temps
1465 For the case of an assignment, we only know that there exist some assignment someplace
1466 in the tree. We don't know what is being assigned so we are very conservative here
1467 and assume that any local variable could have been assigned.
1470 if (argx->gtFlags & GTF_ASG)
1472 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1473 // a tmp, then we need a temp in the late arg list.
1474 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1475 #ifdef FEATURE_FIXED_OUT_ARGS
1476 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1477 // that we only have late non-register args when that feature is on.
1478 #endif // FEATURE_FIXED_OUT_ARGS
1481 curArgTabEntry->needTmp = true;
1484 // For all previous arguments, unless they are a simple constant
1485 // we require that they be evaluated into temps
1486 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1488 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1489 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1491 assert(prevArgTabEntry->node);
1492 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1494 prevArgTabEntry->needTmp = true;
1499 #if FEATURE_FIXED_OUT_ARGS
1500 // Like calls, if this argument has a tree that will do an inline throw,
1501 // a call to a jit helper, then we need to treat it like a call (but only
1502 // if there are/were any stack args).
1503 // This means unnesting, sorting, etc. Technically this is overly
1504 // conservative, but I want to avoid as much special-case debug-only code
1505 // as possible, so leveraging the GTF_CALL flag is the easiest.
1506 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1507 compiler->opts.compDbgCode &&
1508 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1510 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1512 if (otherInx == curInx)
1517 if (argTable[otherInx]->regNum == REG_STK)
1519 argx->gtFlags |= GTF_CALL;
1524 #endif // FEATURE_FIXED_OUT_ARGS
1526 /* If it contains a call (GTF_CALL) then itself and everything before the call
1527 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1528 has to be kept in the right order since we will move the call to the first position)
1530 For calls we don't have to be quite as conservative as we are with an assignment
1531 since the call won't be modifying any non-address taken LclVars.
1534 if (argx->gtFlags & GTF_CALL)
1536 if (argCount > 1) // If this is not the only argument
1538 curArgTabEntry->needTmp = true;
1540 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1542 // Spill all arguments that are floating point calls
1543 curArgTabEntry->needTmp = true;
1546 // All previous arguments may need to be evaluated into temps
1547 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1549 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1550 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1551 assert(prevArgTabEntry->node);
1553 // For all previous arguments, if they have any GTF_ALL_EFFECT
1554 // we require that they be evaluated into a temp
1555 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1557 prevArgTabEntry->needTmp = true;
1559 #if FEATURE_FIXED_OUT_ARGS
1560 // Or, if they are stored into the FIXED_OUT_ARG area
1561 // we require that they be moved to the gtCallLateArgs
1562 // and replaced with a placeholder node
1563 else if (prevArgTabEntry->regNum == REG_STK)
1565 prevArgTabEntry->needPlace = true;
1567 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1568 else if (prevArgTabEntry->isSplit)
1570 prevArgTabEntry->needPlace = true;
1577 #ifndef LEGACY_BACKEND
1578 #if FEATURE_MULTIREG_ARGS
1579 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1580 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1583 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1585 if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1587 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1589 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1590 curArgTabEntry->needTmp = true;
1592 #ifndef _TARGET_ARM_
1593 // TODO-Arm: This optimization is not implemented for ARM32
1594 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1598 // We call gtPrepareCost to measure the cost of evaluating this tree
1599 compiler->gtPrepareCost(argx);
1601 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1603 // Spill multireg struct arguments that are expensive to evaluate twice
1604 curArgTabEntry->needTmp = true;
1606 else if (argx->OperGet() == GT_OBJ)
1608 GenTreeObj* argObj = argx->AsObj();
1609 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1610 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1617 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1619 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1621 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1622 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1624 curArgTabEntry->needTmp = true;
1631 // Spill any GT_OBJ multireg structs that are difficult to extract
1633 // When we have a GT_OBJ of a struct with the above sizes we would need
1634 // to use 3 or 4 load instructions to load the exact size of this struct.
1635 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1636 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1637 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1640 curArgTabEntry->needTmp = true;
1648 #endif // !_TARGET_ARM_
1650 #endif // FEATURE_MULTIREG_ARGS
1651 #endif // LEGACY_BACKEND
1654 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1655 // if we don't have qmarks, then it doesn't matter.
1656 // So check for Qmark's globally once here, instead of inside the loop.
1658 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1660 #if FEATURE_FIXED_OUT_ARGS
1662 // For Arm/x64 we only care because we can't reorder a register
1663 // argument that uses GT_LCLHEAP. This is an optimization to
1664 // save a check inside the below loop.
1666 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1670 const bool hasStackArgsWeCareAbout = hasStackArgs;
1672 #endif // FEATURE_FIXED_OUT_ARGS
1674 // If we have any stack args we have to force the evaluation
1675 // of any arguments passed in registers that might throw an exception
1677 // Technically we only a required to handle the following two cases:
1678 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1679 // a GT_LCLHEAP node that allocates stuff on the stack
1681 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1683 for (unsigned curInx = 0; curInx < argCount; curInx++)
1685 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1686 assert(curArgTabEntry != nullptr);
1687 GenTreePtr argx = curArgTabEntry->node;
1689 // Examine the register args that are currently not marked needTmp
1691 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1693 if (hasStackArgsWeCareAbout)
1695 #if !FEATURE_FIXED_OUT_ARGS
1696 // On x86 we previously recorded a stack depth of zero when
1697 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1698 // Thus we can not reorder the argument after any stack based argument
1699 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1700 // check for it explicitly
1702 if (argx->gtFlags & GTF_EXCEPT)
1704 curArgTabEntry->needTmp = true;
1708 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1710 if (argx->gtFlags & GTF_EXCEPT)
1712 assert(compiler->compLocallocUsed);
1714 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1716 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1718 curArgTabEntry->needTmp = true;
1724 if (hasStructRegArgWeCareAbout)
1726 // Returns true if a GT_QMARK node is encountered in the argx tree
1728 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1730 curArgTabEntry->needTmp = true;
1738 argsComplete = true;
1741 void fgArgInfo::SortArgs()
1743 assert(argsComplete == true);
1746 if (compiler->verbose)
1748 printf("\nSorting the arguments:\n");
1752 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1753 The idea is to move all "simple" arguments like constants and local vars
1754 to the end of the table, and move the complex arguments towards the beginning
1755 of the table. This will help prevent registers from being spilled by
1756 allowing us to evaluate the more complex arguments before the simpler arguments.
1757 The argTable ends up looking like:
1758 +------------------------------------+ <--- argTable[argCount - 1]
1760 +------------------------------------+
1761 | local var / local field |
1762 +------------------------------------+
1763 | remaining arguments sorted by cost |
1764 +------------------------------------+
1765 | temps (argTable[].needTmp = true) |
1766 +------------------------------------+
1767 | args with calls (GTF_CALL) |
1768 +------------------------------------+ <--- argTable[0]
1771 /* Set the beginning and end for the new argument table */
1774 unsigned begTab = 0;
1775 unsigned endTab = argCount - 1;
1776 unsigned argsRemaining = argCount;
1778 // First take care of arguments that are constants.
1779 // [We use a backward iterator pattern]
1786 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1788 if (curArgTabEntry->regNum != REG_STK)
1793 // Skip any already processed args
1795 if (!curArgTabEntry->processed)
1797 GenTreePtr argx = curArgTabEntry->node;
1799 // put constants at the end of the table
1801 if (argx->gtOper == GT_CNS_INT)
1803 noway_assert(curInx <= endTab);
1805 curArgTabEntry->processed = true;
1807 // place curArgTabEntry at the endTab position by performing a swap
1809 if (curInx != endTab)
1811 argTable[curInx] = argTable[endTab];
1812 argTable[endTab] = curArgTabEntry;
1819 } while (curInx > 0);
1821 if (argsRemaining > 0)
1823 // Next take care of arguments that are calls.
1824 // [We use a forward iterator pattern]
1826 for (curInx = begTab; curInx <= endTab; curInx++)
1828 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1830 // Skip any already processed args
1832 if (!curArgTabEntry->processed)
1834 GenTreePtr argx = curArgTabEntry->node;
1836 // put calls at the beginning of the table
1838 if (argx->gtFlags & GTF_CALL)
1840 curArgTabEntry->processed = true;
1842 // place curArgTabEntry at the begTab position by performing a swap
1844 if (curInx != begTab)
1846 argTable[curInx] = argTable[begTab];
1847 argTable[begTab] = curArgTabEntry;
1857 if (argsRemaining > 0)
1859 // Next take care arguments that are temps.
1860 // These temps come before the arguments that are
1861 // ordinary local vars or local fields
1862 // since this will give them a better chance to become
1863 // enregistered into their actual argument register.
1864 // [We use a forward iterator pattern]
1866 for (curInx = begTab; curInx <= endTab; curInx++)
1868 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1870 // Skip any already processed args
1872 if (!curArgTabEntry->processed)
1874 if (curArgTabEntry->needTmp)
1876 curArgTabEntry->processed = true;
1878 // place curArgTabEntry at the begTab position by performing a swap
1880 if (curInx != begTab)
1882 argTable[curInx] = argTable[begTab];
1883 argTable[begTab] = curArgTabEntry;
1893 if (argsRemaining > 0)
1895 // Next take care of local var and local field arguments.
1896 // These are moved towards the end of the argument evaluation.
1897 // [We use a backward iterator pattern]
1899 curInx = endTab + 1;
1904 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1906 // Skip any already processed args
1908 if (!curArgTabEntry->processed)
1910 GenTreePtr argx = curArgTabEntry->node;
1912 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1914 noway_assert(curInx <= endTab);
1916 curArgTabEntry->processed = true;
1918 // place curArgTabEntry at the endTab position by performing a swap
1920 if (curInx != endTab)
1922 argTable[curInx] = argTable[endTab];
1923 argTable[endTab] = curArgTabEntry;
1930 } while (curInx > begTab);
1933 // Finally, take care of all the remaining arguments.
1934 // Note that we fill in one arg at a time using a while loop.
1935 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1936 while (argsRemaining > 0)
1938 /* Find the most expensive arg remaining and evaluate it next */
1940 fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1941 unsigned expensiveArg = UINT_MAX;
1942 unsigned expensiveArgCost = 0;
1944 // [We use a forward iterator pattern]
1946 for (curInx = begTab; curInx <= endTab; curInx++)
1948 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1950 // Skip any already processed args
1952 if (!curArgTabEntry->processed)
1954 GenTreePtr argx = curArgTabEntry->node;
1956 // We should have already handled these kinds of args
1957 assert(argx->gtOper != GT_LCL_VAR);
1958 assert(argx->gtOper != GT_LCL_FLD);
1959 assert(argx->gtOper != GT_CNS_INT);
1961 // This arg should either have no persistent side effects or be the last one in our table
1962 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1964 if (argsRemaining == 1)
1966 // This is the last arg to place
1967 expensiveArg = curInx;
1968 expensiveArgTabEntry = curArgTabEntry;
1969 assert(begTab == endTab);
1976 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1977 compiler->gtPrepareCost(argx);
1980 if (argx->gtCostEx > expensiveArgCost)
1982 // Remember this arg as the most expensive one that we have yet seen
1983 expensiveArgCost = argx->gtCostEx;
1984 expensiveArg = curInx;
1985 expensiveArgTabEntry = curArgTabEntry;
1991 noway_assert(expensiveArg != UINT_MAX);
1993 // put the most expensive arg towards the beginning of the table
1995 expensiveArgTabEntry->processed = true;
1997 // place expensiveArgTabEntry at the begTab position by performing a swap
1999 if (expensiveArg != begTab)
2001 argTable[expensiveArg] = argTable[begTab];
2002 argTable[begTab] = expensiveArgTabEntry;
2008 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2011 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2012 // and regArgsRemaining should be zero
2013 assert(begTab == (endTab + 1));
2014 assert(argsRemaining == 0);
2016 #if !FEATURE_FIXED_OUT_ARGS
2017 // Finally build the regArgList
2019 callTree->gtCall.regArgList = NULL;
2020 callTree->gtCall.regArgListCount = regCount;
2022 unsigned regInx = 0;
2023 for (curInx = 0; curInx < argCount; curInx++)
2025 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2027 if (curArgTabEntry->regNum != REG_STK)
2029 // Encode the argument register in the register mask
2031 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2035 #endif // !FEATURE_FIXED_OUT_ARGS
2040 //------------------------------------------------------------------------------
2041 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2042 // We need this to be done in order to enforce ordering
2043 // of the evaluation of arguments.
2046 // tmpVarNum - the var num which we clone into the newly created temp var.
2049 // the newly created temp var tree.
2051 GenTreePtr Compiler::fgMakeTmpArgNode(
2052 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2054 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2055 assert(varDsc->lvIsTemp);
2056 var_types type = varDsc->TypeGet();
2058 // Create a copy of the temp to go into the late argument list
2059 GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
2060 GenTreePtr addrNode = nullptr;
2062 if (varTypeIsStruct(type))
2065 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2067 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2069 arg->gtFlags |= GTF_DONT_CSE;
2071 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2072 // Can this type be passed in a single register?
2073 // If so, the following call will return the corresponding primitive type.
2074 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2076 bool passedInRegisters = false;
2077 structPassingKind kind;
2078 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2079 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2081 if (structBaseType != TYP_UNKNOWN)
2083 passedInRegisters = true;
2084 type = structBaseType;
2086 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2088 // If it is passed in registers, don't get the address of the var. Make it a
2089 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2090 if (passedInRegisters)
2092 arg->ChangeOper(GT_LCL_FLD);
2097 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2098 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2099 var_types addrType = type;
2101 var_types addrType = TYP_BYREF;
2103 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2106 #if FEATURE_MULTIREG_ARGS
2107 #ifdef _TARGET_ARM64_
2108 assert(varTypeIsStruct(type));
2109 if (lvaIsMultiregStruct(varDsc))
2111 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2112 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2113 // We will create a GT_OBJ for the argument below.
2114 // This will be passed by value in two registers.
2115 assert(addrNode != nullptr);
2117 // Create an Obj of the temp to use it as a call argument.
2118 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2120 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2121 // this is only to preserve former behavior (though some CSE'ing of struct
2122 // values can be pessimizing, so enabling this may require some additional tuning).
2123 arg->gtFlags |= GTF_DONT_CSE;
2125 #endif // _TARGET_ARM64_
2126 #endif // FEATURE_MULTIREG_ARGS
2129 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2131 // other targets, we pass the struct by value
2132 assert(varTypeIsStruct(type));
2134 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2136 // Get a new Obj node temp to use it as a call argument.
2137 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2138 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2140 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2142 } // (varTypeIsStruct(type))
2144 if (addrNode != nullptr)
2146 assert(addrNode->gtOper == GT_ADDR);
2148 // This will prevent this LclVar from being optimized away
2149 lvaSetVarAddrExposed(tmpVarNum);
2151 // the child of a GT_ADDR is required to have this flag set
2152 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2158 void fgArgInfo::EvalArgsToTemps()
2160 assert(argsSorted == true);
2162 unsigned regArgInx = 0;
2163 // Now go through the argument table and perform the necessary evaluation into temps
2164 GenTreeArgList* tmpRegArgNext = nullptr;
2165 for (unsigned curInx = 0; curInx < argCount; curInx++)
2167 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2169 GenTreePtr argx = curArgTabEntry->node;
2170 GenTreePtr setupArg = nullptr;
2173 #if !FEATURE_FIXED_OUT_ARGS
2174 // Only ever set for FEATURE_FIXED_OUT_ARGS
2175 assert(curArgTabEntry->needPlace == false);
2177 // On x86 and other archs that use push instructions to pass arguments:
2178 // Only the register arguments need to be replaced with placeholder nodes.
2179 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2181 if (curArgTabEntry->regNum == REG_STK)
2185 if (curArgTabEntry->needTmp)
2189 if (curArgTabEntry->isTmp == true)
2191 // Create a copy of the temp to go into the late argument list
2192 tmpVarNum = curArgTabEntry->tmpNum;
2193 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2194 argTable[curInx]->structDesc.passedInRegisters));
2196 // mark the original node as a late argument
2197 argx->gtFlags |= GTF_LATE_ARG;
2201 // Create a temp assignment for the argument
2202 // Put the temp in the gtCallLateArgs list
2203 CLANG_FORMAT_COMMENT_ANCHOR;
2206 if (compiler->verbose)
2208 printf("Argument with 'side effect'...\n");
2209 compiler->gtDispTree(argx);
2213 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2214 noway_assert(argx->gtType != TYP_STRUCT);
2217 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2218 if (argx->gtOper == GT_MKREFANY)
2220 // For GT_MKREFANY, typically the actual struct copying does
2221 // not have any side-effects and can be delayed. So instead
2222 // of using a temp for the whole struct, we can just use a temp
2223 // for operand that that has a side-effect
2225 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2227 operand = argx->gtOp.gtOp1;
2229 // In the early argument evaluation, place an assignment to the temp
2230 // from the source operand of the mkrefany
2231 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2233 // Replace the operand for the mkrefany with the new temp.
2234 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2236 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2238 operand = argx->gtOp.gtOp2;
2240 // In the early argument evaluation, place an assignment to the temp
2241 // from the source operand of the mkrefany
2242 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2244 // Replace the operand for the mkrefany with the new temp.
2245 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2249 if (setupArg != nullptr)
2251 // Now keep the mkrefany for the late argument list
2254 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2255 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2259 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2261 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2263 #ifndef LEGACY_BACKEND
2264 if (compiler->fgOrder == Compiler::FGOrderLinear)
2266 // We'll reference this temporary variable just once
2267 // when we perform the function call after
2268 // setting up this argument.
2269 varDsc->lvRefCnt = 1;
2271 #endif // !LEGACY_BACKEND
2273 var_types lclVarType = genActualType(argx->gtType);
2274 var_types scalarType = TYP_UNKNOWN;
2276 if (setupArg->OperIsCopyBlkOp())
2278 setupArg = compiler->fgMorphCopyBlock(setupArg);
2279 #if defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2280 // This scalar LclVar widening step is only performed for ARM architectures.
2282 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2283 unsigned structSize = varDsc->lvExactSize;
2285 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2286 #endif // _TARGET_ARM*_
2289 // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8)
2290 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2292 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2293 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2297 // Create a copy of the temp to go to the late argument list
2298 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2301 curArgTabEntry->isTmp = true;
2302 curArgTabEntry->tmpNum = tmpVarNum;
2305 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2306 // might have left holes in the used registers (see
2307 // fgAddSkippedRegsInPromotedStructArg).
2308 // Too bad we're not that smart for these intermediate temps...
2309 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2311 regNumber argReg = curArgTabEntry->regNum;
2312 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2313 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2315 argReg = genRegArgNext(argReg);
2316 allUsedRegs |= genRegMask(argReg);
2318 #ifdef LEGACY_BACKEND
2319 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2320 #endif // LEGACY_BACKEND
2322 #endif // _TARGET_ARM_
2325 /* mark the assignment as a late argument */
2326 setupArg->gtFlags |= GTF_LATE_ARG;
2329 if (compiler->verbose)
2331 printf("\n Evaluate to a temp:\n");
2332 compiler->gtDispTree(setupArg);
2337 else // curArgTabEntry->needTmp == false
2340 // Only register args are replaced with placeholder nodes
2341 // and the stack based arguments are evaluated and pushed in order.
2343 // On Arm/x64 - When needTmp is false and needPlace is false,
2344 // the non-register arguments are evaluated and stored in order.
2345 // When needPlace is true we have a nested call that comes after
2346 // this argument so we have to replace it in the gtCallArgs list
2347 // (the initial argument evaluation list) with a placeholder.
2349 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2354 /* No temp needed - move the whole node to the gtCallLateArgs list */
2356 /* The argument is deferred and put in the late argument list */
2360 // Create a placeholder node to put in its place in gtCallLateArgs.
2362 // For a struct type we also need to record the class handle of the arg.
2363 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2365 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2367 // All structs are either passed (and retyped) as integral types, OR they
2368 // are passed by reference.
2369 noway_assert(argx->gtType != TYP_STRUCT);
2371 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2373 if (varTypeIsStruct(defArg))
2375 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2376 GenTreePtr defArgTmp = defArg;
2378 // The GT_OBJ may be be a child of a GT_COMMA.
2379 while (defArgTmp->gtOper == GT_COMMA)
2381 defArgTmp = defArgTmp->gtOp.gtOp2;
2383 assert(varTypeIsStruct(defArgTmp));
2385 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2386 if (defArgTmp->gtOper == GT_MKREFANY)
2388 clsHnd = compiler->impGetRefAnyClass();
2390 else if (defArgTmp->gtOper == GT_OBJ)
2392 clsHnd = defArgTmp->AsObj()->gtClass;
2396 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2400 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2402 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2404 /* mark the placeholder node as a late argument */
2405 setupArg->gtFlags |= GTF_LATE_ARG;
2408 if (compiler->verbose)
2410 if (curArgTabEntry->regNum == REG_STK)
2412 printf("Deferred stack argument :\n");
2416 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2419 compiler->gtDispTree(argx);
2420 printf("Replaced with placeholder node:\n");
2421 compiler->gtDispTree(setupArg);
2426 if (setupArg != nullptr)
2428 if (curArgTabEntry->parent)
2430 GenTreePtr parent = curArgTabEntry->parent;
2431 /* a normal argument from the list */
2432 noway_assert(parent->OperIsList());
2433 noway_assert(parent->gtOp.gtOp1 == argx);
2435 parent->gtOp.gtOp1 = setupArg;
2439 /* must be the gtCallObjp */
2440 noway_assert(callTree->gtCall.gtCallObjp == argx);
2442 callTree->gtCall.gtCallObjp = setupArg;
2446 /* deferred arg goes into the late argument list */
2448 if (tmpRegArgNext == nullptr)
2450 tmpRegArgNext = compiler->gtNewArgList(defArg);
2451 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2455 noway_assert(tmpRegArgNext->OperIsList());
2456 noway_assert(tmpRegArgNext->Current());
2457 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2458 tmpRegArgNext = tmpRegArgNext->Rest();
2461 curArgTabEntry->node = defArg;
2462 curArgTabEntry->lateArgInx = regArgInx++;
2466 if (compiler->verbose)
2468 printf("\nShuffled argument table: ");
2469 for (unsigned curInx = 0; curInx < argCount; curInx++)
2471 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2473 if (curArgTabEntry->regNum != REG_STK)
2475 printf("%s ", getRegName(curArgTabEntry->regNum));
2483 // Get the late arg for arg at position argIndex.
2484 // argIndex - 0-based position to get late arg for.
2485 // Caller must ensure this position has a late arg.
2486 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2488 for (unsigned j = 0; j < this->ArgCount(); j++)
2490 if (this->ArgTable()[j]->argNum == argIndex)
2492 return this->ArgTable()[j]->node;
2495 // Caller must ensure late arg exists.
2499 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2501 assert(!IsUninitialized(stkLvl));
2502 this->stkLevel = stkLvl;
2505 unsigned fgArgInfo::RetrieveStkLevel()
2507 assert(!IsUninitialized(stkLevel));
2511 // Return a conservative estimate of the stack size in bytes.
2512 // It will be used only on the intercepted-for-host code path to copy the arguments.
2513 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2517 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2523 if (numArgs > MAX_REG_ARG)
2525 numStkArgs = numArgs - MAX_REG_ARG;
2532 return numStkArgs * REGSIZE_BYTES;
2535 //------------------------------------------------------------------------------
2536 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2537 // otherwise insert a comma form temp
2540 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2541 // evaluates ppTree to a temp and returns the result
2544 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2547 // The result tree MUST be added to the tree structure since the ref counts are
2548 // already incremented.
2550 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2552 GenTree* tree = *pOp;
2553 if (tree->IsLocal())
2555 auto result = gtClone(tree);
2556 if (lvaLocalVarRefCounted)
2558 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2564 GenTree* result = fgInsertCommaFormTemp(pOp);
2566 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2567 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2568 // be added by the caller.
2569 if (lvaLocalVarRefCounted)
2571 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2572 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2573 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2580 //------------------------------------------------------------------------------
2581 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2582 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2585 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2586 // evaluates ppTree to a temp and returns the result
2588 // structType - value type handle if the temp created is of TYP_STRUCT.
2591 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2594 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2596 GenTree* subTree = *ppTree;
2598 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2600 if (varTypeIsStruct(subTree))
2602 assert(structType != nullptr);
2603 lvaSetStruct(lclNum, structType, false);
2606 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2607 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2608 // setting type of lcl vars created.
2609 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2611 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2613 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2617 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2620 //------------------------------------------------------------------------
2621 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2624 // callNode - the call for which we are doing the argument morphing
2627 // Like most morph methods, this method returns the morphed node,
2628 // though in this case there are currently no scenarios where the
2629 // node itself is re-created.
2632 // This method is even less idempotent than most morph methods.
2633 // That is, it makes changes that should not be redone. It uses the existence
2634 // of gtCallLateArgs (the late arguments list) to determine if it has
2635 // already done that work.
2637 // The first time it is called (i.e. during global morphing), this method
2638 // computes the "late arguments". This is when it determines which arguments
2639 // need to be evaluated to temps prior to the main argument setup, and which
2640 // can be directly evaluated into the argument location. It also creates a
2641 // second argument list (gtCallLateArgs) that does the final placement of the
2642 // arguments, e.g. into registers or onto the stack.
2644 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2645 // evaluation of the arguments that might have side-effects, such as embedded
2646 // assignments, calls or possible throws. In these cases, it and earlier
2647 // arguments must be evaluated to temps.
2649 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2650 // if we have any nested calls, we need to defer the copying of the argument
2651 // into the fixed argument area until after the call. If the argument did not
2652 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2653 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2656 #pragma warning(push)
2657 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2659 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2664 unsigned flagsSummary = 0;
2665 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2667 unsigned argIndex = 0;
2669 unsigned intArgRegNum = 0;
2670 unsigned fltArgRegNum = 0;
2673 regMaskTP argSkippedRegMask = RBM_NONE;
2674 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2675 #endif // _TARGET_ARM_
2677 #if defined(_TARGET_X86_)
2678 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2680 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2683 unsigned argSlots = 0;
2684 unsigned nonRegPassedStructSlots = 0;
2685 bool reMorphing = call->AreArgsComplete();
2686 bool callHasRetBuffArg = call->HasRetBufArg();
2688 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2689 bool callIsVararg = call->IsVarargs();
2692 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2693 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2694 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2695 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2696 bool hasStackArgCopy = false;
2699 #ifndef LEGACY_BACKEND
2700 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2701 // following the normal calling convention or in the normal argument registers. We either mark existing
2702 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2703 // non-standard arguments into the argument list, below.
2704 class NonStandardArgs
2706 struct NonStandardArg
2708 regNumber reg; // The register to be assigned to this non-standard argument.
2709 GenTree* node; // The tree node representing this non-standard argument.
2710 // Note that this must be updated if the tree node changes due to morphing!
2713 ArrayStack<NonStandardArg> args;
2716 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2720 //-----------------------------------------------------------------------------
2721 // Add: add a non-standard argument to the table of non-standard arguments
2724 // node - a GenTree node that has a non-standard argument.
2725 // reg - the register to assign to this node.
2730 void Add(GenTree* node, regNumber reg)
2732 NonStandardArg nsa = {reg, node};
2736 //-----------------------------------------------------------------------------
2737 // Find: Look for a GenTree* in the set of non-standard args.
2740 // node - a GenTree node to look for
2743 // The index of the non-standard argument (a non-negative, unique, stable number).
2744 // If the node is not a non-standard argument, return -1.
2746 int Find(GenTree* node)
2748 for (int i = 0; i < args.Height(); i++)
2750 if (node == args.Index(i).node)
2758 //-----------------------------------------------------------------------------
2759 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2760 // set the register to use for the node.
2763 // node - a GenTree node to look for
2764 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2765 // 'node' is found in the non-standard argument set.
2768 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2770 // 'false' otherwise (in this case, *pReg is unmodified).
2772 bool FindReg(GenTree* node, regNumber* pReg)
2774 for (int i = 0; i < args.Height(); i++)
2776 NonStandardArg& nsa = args.IndexRef(i);
2777 if (node == nsa.node)
2786 //-----------------------------------------------------------------------------
2787 // Replace: Replace the non-standard argument node at a given index. This is done when
2788 // the original node was replaced via morphing, but we need to continue to assign a
2789 // particular non-standard arg to it.
2792 // index - the index of the non-standard arg. It must exist.
2793 // node - the new GenTree node.
2798 void Replace(int index, GenTree* node)
2800 args.IndexRef(index).node = node;
2803 } nonStandardArgs(this);
2804 #endif // !LEGACY_BACKEND
2806 // Count of args. On first morph, this is counted before we've filled in the arg table.
2807 // On remorph, we grab it from the arg table.
2808 unsigned numArgs = 0;
2810 // Process the late arguments (which were determined by a previous caller).
2811 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2812 // may need to refer to it.
2815 // We need to reMorph the gtCallLateArgs early since that is what triggers
2816 // the expression folding and we need to have the final folded gtCallLateArgs
2817 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2818 // with the folded tree that represents the final optimized argument nodes.
2820 // However if a range-check needs to be generated for any of these late
2821 // arguments we also need to "know" what the stack depth will be when we generate
2822 // code to branch to the throw range check failure block as that is part of the
2823 // GC information contract for that block.
2825 // Since the late arguments are evaluated last we have pushed all of the
2826 // other arguments on the stack before we evaluate these late arguments,
2827 // so we record the stack depth on the first morph call when reMorphing
2828 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2830 if (call->gtCallLateArgs != nullptr)
2832 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2833 fgPtrArgCntCur += callStkLevel;
2834 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2835 flagsSummary |= call->gtCallLateArgs->gtFlags;
2836 fgPtrArgCntCur -= callStkLevel;
2838 assert(call->fgArgInfo != nullptr);
2839 call->fgArgInfo->RemorphReset();
2841 numArgs = call->fgArgInfo->ArgCount();
2845 // First we need to count the args
2846 if (call->gtCallObjp)
2850 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2855 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2856 // arguments registers that don't follow the normal progression of argument registers in the calling
2857 // convention (such as for the ARM64 fixed return buffer argument x8).
2859 // *********** NOTE *************
2860 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2861 // in the implementation of fast tail call.
2862 // *********** END NOTE *********
2863 CLANG_FORMAT_COMMENT_ANCHOR;
2865 #if !defined(LEGACY_BACKEND)
2866 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2867 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2868 // Set the argument registers correctly here.
2869 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2871 GenTreeArgList* args = call->gtCallArgs;
2872 GenTree* arg1 = args->Current();
2873 assert(arg1 != nullptr);
2874 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2876 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2877 #if defined(_TARGET_X86_)
2878 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2879 // hi part to be in EDX. This sets the argument registers up correctly.
2880 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2881 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2883 GenTreeArgList* args = call->gtCallArgs;
2884 GenTree* arg1 = args->Current();
2885 assert(arg1 != nullptr);
2886 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2888 args = args->Rest();
2889 GenTree* arg2 = args->Current();
2890 assert(arg2 != nullptr);
2891 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2893 #else // !defined(_TARGET_X86_)
2894 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2895 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2896 // convention for x86/SSE.
2898 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2900 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2902 args = call->gtCallArgs;
2903 assert(args != nullptr);
2904 assert(args->OperIsList());
2906 argx = call->gtCallArgs->Current();
2908 // We don't increment numArgs here, since we already counted this argument above.
2910 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2913 // We are allowed to have a Fixed Return Buffer argument combined
2914 // with any of the remaining non-standard arguments
2916 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2918 assert(!call->gtCallCookie);
2919 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2920 // It will be used only on the intercepted-for-host code path to copy the arguments.
2922 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2923 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2926 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2928 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2930 // indirect VSD stubs need the base of the indirection cell to be
2931 // passed in addition. At this point that is the value in gtCallAddr.
2932 // The actual call target will be derived from gtCallAddr in call
2935 // If it is a VSD call getting dispatched via tail call helper,
2936 // fgMorphTailCall() would materialize stub addr as an additional
2937 // parameter added to the original arg list and hence no need to
2938 // add as a non-standard arg.
2940 GenTree* arg = call->gtCallAddr;
2941 if (arg->OperIsLocal())
2943 arg = gtClone(arg, true);
2947 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2948 call->gtFlags |= GTF_ASG;
2950 noway_assert(arg != nullptr);
2952 // And push the stub address onto the list of arguments
2953 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2956 nonStandardArgs.Add(arg, virtualStubParamInfo->GetReg());
2959 #endif // defined(_TARGET_X86_)
2960 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2962 assert(!call->IsUnmanaged());
2964 GenTree* arg = call->gtCallCookie;
2965 noway_assert(arg != nullptr);
2966 call->gtCallCookie = nullptr;
2968 #if defined(_TARGET_X86_)
2969 // x86 passes the cookie on the stack as the final argument to the call.
2970 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2971 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2974 *insertionPoint = gtNewListNode(arg, nullptr);
2975 #else // !defined(_TARGET_X86_)
2976 // All other architectures pass the cookie in a register.
2977 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2978 #endif // defined(_TARGET_X86_)
2980 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2983 // put destination into R10/EAX
2984 arg = gtClone(call->gtCallAddr, true);
2985 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2988 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2990 // finally change this call to a helper call
2991 call->gtCallType = CT_HELPER;
2992 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2994 #endif // !defined(LEGACY_BACKEND)
2996 // Allocate the fgArgInfo for the call node;
2998 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
3001 if (varTypeIsStruct(call))
3003 fgFixupStructReturn(call);
3006 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
3007 * During the first call to fgMorphArgs we also record the
3008 * information about late arguments we have in 'fgArgInfo'.
3009 * This information is used later to contruct the gtCallLateArgs */
3011 /* Process the 'this' argument value, if present */
3013 argx = call->gtCallObjp;
3017 argx = fgMorphTree(argx);
3018 call->gtCallObjp = argx;
3019 flagsSummary |= argx->gtFlags;
3021 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3023 assert(argIndex == 0);
3025 /* We must fill in or update the argInfo table */
3029 /* this is a register argument - possibly update it in the table */
3030 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3034 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3036 /* this is a register argument - put it in the table */
3037 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3038 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3040 false, REG_STK, nullptr
3041 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3044 // this can't be a struct.
3045 assert(argx->gtType != TYP_STRUCT);
3047 /* Increment the argument register count and argument index */
3048 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3051 #ifdef WINDOWS_AMD64_ABI
3052 // Whenever we pass an integer register argument
3053 // we skip the corresponding floating point register argument
3055 #endif // WINDOWS_AMD64_ABI
3059 noway_assert(!"the 'this' pointer can not be a floating point type");
3066 // Compute the maximum number of arguments that can be passed in registers.
3067 // For X86 we handle the varargs and unmanaged calling conventions
3069 if (call->gtFlags & GTF_CALL_POP_ARGS)
3071 noway_assert(intArgRegNum < MAX_REG_ARG);
3072 // No more register arguments for varargs (CALL_POP_ARGS)
3073 maxRegArgs = intArgRegNum;
3075 // Add in the ret buff arg
3076 if (callHasRetBuffArg)
3080 if (call->IsUnmanaged())
3082 noway_assert(intArgRegNum == 0);
3084 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3086 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3087 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3088 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3089 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3097 // Add in the ret buff arg
3098 if (callHasRetBuffArg)
3101 #endif // _TARGET_X86_
3103 /* Morph the user arguments */
3104 CLANG_FORMAT_COMMENT_ANCHOR;
3106 #if defined(_TARGET_ARM_)
3108 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3109 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3110 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3111 // appear in a lower-numbered register than floating point argument N. That is, argument
3112 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3113 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3114 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3115 // only back-fill single registers, since there is no way with these types to create
3116 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3117 // available (with 16 FP argument registers). Consider this code:
3119 // struct HFA { float x, y, z; }; // a three element HFA
3120 // void bar(float a1, // passed in f0
3121 // double a2, // passed in f2/f3; skip f1 for alignment
3122 // HFA a3, // passed in f4/f5/f6
3123 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3124 // HFA a5, // passed in f10/f11/f12
3125 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3127 // float a7, // passed in f1 (back-filled)
3128 // float a8, // passed in f7 (back-filled)
3129 // float a9, // passed in f13 (back-filled)
3130 // float a10) // passed on the stack in [OutArg+0]
3132 // Note that if we ever support FP types with larger alignment requirements, then there could
3133 // be more than single register back-fills.
3135 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3136 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3137 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3138 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3139 // and prevent any additional floating-point arguments from going in registers.
3141 bool anyFloatStackArgs = false;
3143 #endif // _TARGET_ARM_
3145 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3146 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3147 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3149 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3150 bool hasMultiregStructArgs = false;
3151 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3153 GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3155 #if FEATURE_MULTIREG_ARGS
3156 if (!hasStructArgument)
3158 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3160 #endif // FEATURE_MULTIREG_ARGS
3162 #ifndef LEGACY_BACKEND
3163 // Record the index of any nonStandard arg that we may be processing here, as we are
3164 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3165 GenTreePtr orig_argx = *parentArgx;
3166 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3167 #endif // !LEGACY_BACKEND
3169 argx = fgMorphTree(*parentArgx);
3171 flagsSummary |= argx->gtFlags;
3173 assert(args->OperIsList());
3174 assert(argx == args->Current());
3176 #ifndef LEGACY_BACKEND
3177 if ((nonStandard_index != -1) && (argx != orig_argx))
3179 // We need to update the node field for this nonStandard arg here
3180 // as it was changed by the call to fgMorphTree
3181 nonStandardArgs.Replace(nonStandard_index, argx);
3183 #endif // !LEGACY_BACKEND
3185 /* Change the node to TYP_I_IMPL so we don't report GC info
3186 * NOTE: We deferred this from the importer because of the inliner */
3188 if (argx->IsVarAddr())
3190 argx->gtType = TYP_I_IMPL;
3193 bool passUsingFloatRegs;
3194 unsigned argAlign = 1;
3195 // Setup any HFA information about 'argx'
3196 var_types hfaType = GetHfaType(argx);
3197 bool isHfaArg = varTypeIsFloating(hfaType);
3198 unsigned hfaSlots = 0;
3202 hfaSlots = GetHfaCount(argx);
3204 // If we have a HFA struct it's possible we transition from a method that originally
3205 // only had integer types to now start having FP types. We have to communicate this
3206 // through this flag since LSRA later on will use this flag to determine whether
3207 // or not to track the FP register set.
3209 compFloatingPointUsed = true;
3213 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3214 bool isRegArg = false;
3215 bool isNonStandard = false;
3216 regNumber nonStdRegNum = REG_NA;
3218 fgArgTabEntryPtr argEntry = nullptr;
3222 argEntry = gtArgEntryByArgNum(call, argIndex);
3227 bool passUsingIntRegs;
3230 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3231 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3235 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3236 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3239 GenTreePtr curArg = argx;
3240 // If late args have already been computed, use the node in the argument table.
3241 if (argEntry != NULL && argEntry->isTmp)
3243 curArg = argEntry->node;
3248 argAlign = argEntry->alignment;
3252 // We don't use the "size" return value from InferOpSizeAlign().
3253 codeGen->InferOpSizeAlign(curArg, &argAlign);
3255 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3256 argAlign /= TARGET_POINTER_SIZE;
3261 if (passUsingFloatRegs)
3263 if (fltArgRegNum % 2 == 1)
3265 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3269 else if (passUsingIntRegs)
3271 if (intArgRegNum % 2 == 1)
3273 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3278 if (argSlots % 2 == 1)
3284 #elif defined(_TARGET_ARM64_)
3288 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3292 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3295 #elif defined(_TARGET_AMD64_)
3298 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3302 passUsingFloatRegs = varTypeIsFloating(argx);
3304 #elif defined(_TARGET_X86_)
3306 passUsingFloatRegs = false;
3309 #error Unsupported or unset target architecture
3312 bool isBackFilled = false;
3313 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3314 var_types structBaseType = TYP_STRUCT;
3315 unsigned structSize = 0;
3317 bool isStructArg = varTypeIsStruct(argx);
3321 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3322 // Get the struct description for the already completed struct argument.
3323 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3324 assert(fgEntryPtr != nullptr);
3326 // As described in few other places, this can happen when the argx was morphed
3327 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3328 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3329 // was a struct and the struct classification.
3330 isStructArg = fgEntryPtr->isStruct;
3334 structDesc.CopyFrom(fgEntryPtr->structDesc);
3336 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3338 assert(argEntry != nullptr);
3339 if (argEntry->IsBackFilled())
3342 size = argEntry->numRegs;
3343 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3345 isBackFilled = true;
3347 else if (argEntry->regNum == REG_STK)
3350 assert(argEntry->numRegs == 0);
3351 size = argEntry->numSlots;
3356 assert(argEntry->numRegs > 0);
3357 size = argEntry->numRegs + argEntry->numSlots;
3360 // This size has now been computed
3366 // Figure out the size of the argument. This is either in number of registers, or number of
3367 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3370 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3372 #if defined(_TARGET_AMD64_)
3373 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3376 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3380 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3381 TARGET_POINTER_SIZE)) /
3382 TARGET_POINTER_SIZE;
3383 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3386 hasMultiregStructArgs = true;
3389 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3390 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3391 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3392 #elif defined(_TARGET_ARM64_)
3397 size = GetHfaCount(argx);
3398 // HFA structs are passed by value in multiple registers
3399 hasMultiregStructArgs = true;
3403 // Structs are either passed in 1 or 2 (64-bit) slots
3404 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3405 TARGET_POINTER_SIZE)) /
3406 TARGET_POINTER_SIZE;
3410 // Structs that are the size of 2 pointers are passed by value in multiple registers
3411 hasMultiregStructArgs = true;
3415 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3416 // reference (to a copy)
3419 // Note that there are some additional rules for multireg structs.
3420 // (i.e they cannot be split between registers and the stack)
3424 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3426 #elif defined(_TARGET_ARM_)
3429 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3430 TARGET_POINTER_SIZE)) /
3431 TARGET_POINTER_SIZE;
3432 if (isHfaArg || size > 1)
3434 hasMultiregStructArgs = true;
3440 // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3441 size = genTypeStSz(argx->gtType);
3443 #elif defined(_TARGET_X86_)
3444 size = genTypeStSz(argx->gtType);
3446 #error Unsupported or unset target architecture
3447 #endif // _TARGET_XXX_
3452 size = GetHfaCount(argx);
3453 hasMultiregStructArgs = true;
3455 #endif // _TARGET_ARM_
3458 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3459 if (argx->gtOper == GT_MKREFANY)
3461 if (varTypeIsStruct(argx))
3465 #ifdef _TARGET_AMD64_
3466 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3467 if (varTypeIsStruct(argx))
3469 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3470 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3471 size = roundupSize / TARGET_POINTER_SIZE;
3472 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3475 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3483 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3485 GenTreePtr argObj = argx;
3486 GenTreePtr* parentOfArgObj = parentArgx;
3488 assert(args->OperIsList());
3489 assert(argx == args->Current());
3491 /* The GT_OBJ may be be a child of a GT_COMMA */
3492 while (argObj->gtOper == GT_COMMA)
3494 parentOfArgObj = &argObj->gtOp.gtOp2;
3495 argObj = argObj->gtOp.gtOp2;
3498 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3499 if (argObj->gtOper != GT_OBJ)
3501 BADCODE("illegal argument tree in fgMorphArgs");
3504 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3505 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3506 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3507 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3509 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3510 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3511 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3513 structSize = originalSize;
3515 structPassingKind howToPassStruct;
3516 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3518 #ifdef _TARGET_ARM64_
3519 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3520 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3522 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3524 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3525 // we can read 4 or 8 bytes from the LclVar to pass this arg
3526 originalSize = genTypeSize(structBaseType);
3529 #endif // _TARGET_ARM64_
3531 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3532 // On System V OS-es a struct is never passed by reference.
3533 // It is either passed by value on the stack or in registers.
3534 bool passStructInRegisters = false;
3535 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3536 bool passStructByRef = false;
3537 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3539 // The following if-then-else needs to be carefully refactored.
3540 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3541 // into a GT_IND of the appropriate size.
3542 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3543 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3544 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3545 // It also can't do this if we have a HFA arg,
3546 // unless we have a 1-elem HFA in which case we want to do the optimization.
3547 CLANG_FORMAT_COMMENT_ANCHOR;
3549 #ifndef _TARGET_X86_
3550 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3551 // Check for struct argument with size 1, 2, 4 or 8 bytes
3552 // As we can optimize these by turning them into a GT_IND of the correct type
3554 // Check for cases that we cannot optimize:
3556 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3557 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3558 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3559 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3561 // Normalize 'size' to the number of pointer sized items
3562 // 'size' is the number of register slots that we will use to pass the argument
3563 size = roundupSize / TARGET_POINTER_SIZE;
3564 #if defined(_TARGET_AMD64_)
3565 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3566 size = 1; // This must be copied to a temp and passed by address
3567 passStructByRef = true;
3568 copyBlkClass = objClass;
3569 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3570 if (!structDesc.passedInRegisters)
3572 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3573 bool needCpyBlk = false;
3574 if (lclVar != nullptr)
3576 // If the struct is promoted to registers, it has to be materialized
3577 // on stack. We may want to support promoted structures in
3578 // codegening pugarg_stk instead of creating a copy here.
3579 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3580 needCpyBlk = varDsc->lvPromoted;
3584 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3585 // sets structDesc.passedInRegisters to be false.
3587 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3588 // by rationalizer. For now we will let SIMD struct arg to be copied to
3589 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3592 // | \--* addr byref
3593 // | | /--* lclVar simd16 V05 loc4
3594 // | \--* simd simd16 int -
3595 // | \--* lclVar simd16 V08 tmp1
3597 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3598 // so that we don't need to generate a copy here.
3599 GenTree* addr = argObj->gtOp.gtOp1;
3600 if (addr->OperGet() == GT_ADDR)
3602 GenTree* addrChild = addr->gtOp.gtOp1;
3603 if (addrChild->OperGet() == GT_SIMD)
3609 passStructInRegisters = false;
3612 copyBlkClass = objClass;
3616 copyBlkClass = NO_CLASS_HANDLE;
3621 // The objClass is used to materialize the struct on stack.
3622 // For SystemV, the code below generates copies for struct arguments classified
3623 // as register argument.
3624 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3625 // can be passed on registers or can be copied directly to outgoing area.
3626 passStructInRegisters = true;
3627 copyBlkClass = objClass;
3630 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3631 #elif defined(_TARGET_ARM64_)
3632 if ((size > 2) && !isHfaArg)
3634 size = 1; // This must be copied to a temp and passed by address
3635 passStructByRef = true;
3636 copyBlkClass = objClass;
3641 // If we're passing a promoted struct local var,
3642 // we may need to skip some registers due to alignment; record those.
3643 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3646 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3647 if (varDsc->lvPromoted)
3649 assert(argObj->OperGet() == GT_OBJ);
3650 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3652 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3656 #endif // _TARGET_ARM_
3658 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3659 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3660 // generated for struct 1, 2, 4, or 8.
3661 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3663 // change our GT_OBJ into a GT_IND of the correct type.
3664 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3667 assert(howToPassStruct == SPK_PrimitiveType);
3669 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3673 // If we reach here with an HFA arg it has to be a one element HFA
3674 assert(hfaSlots == 1);
3675 structBaseType = hfaType; // change the indirection type to a floating point type
3678 noway_assert(structBaseType != TYP_UNKNOWN);
3680 argObj->ChangeOper(GT_IND);
3682 // Now see if we can fold *(&X) into X
3683 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3685 GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3687 // Keep the DONT_CSE flag in sync
3688 // (as the addr always marks it for its op1)
3689 temp->gtFlags &= ~GTF_DONT_CSE;
3690 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3691 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3692 DEBUG_DESTROY_NODE(argObj); // GT_IND
3695 *parentOfArgObj = temp;
3697 // If the OBJ had been the top level node, we've now changed argx.
3698 if (parentOfArgObj == parentArgx)
3703 if (argObj->gtOper == GT_LCL_VAR)
3705 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3706 LclVarDsc* varDsc = &lvaTable[lclNum];
3708 if (varDsc->lvPromoted)
3710 if (varDsc->lvFieldCnt == 1)
3712 // get the first and only promoted field
3713 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3714 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3716 // we will use the first and only promoted field
3717 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3719 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3720 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3722 // Just use the existing field's type
3723 argObj->gtType = fieldVarDsc->TypeGet();
3727 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3729 argObj->ChangeOper(GT_LCL_FLD);
3730 argObj->gtType = structBaseType;
3732 assert(varTypeCanReg(argObj->TypeGet()));
3733 assert(copyBlkClass == NO_CLASS_HANDLE);
3737 // use GT_LCL_FLD to swizzle the single field struct to a new type
3738 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3739 argObj->ChangeOper(GT_LCL_FLD);
3740 argObj->gtType = structBaseType;
3745 // The struct fits into a single register, but it has been promoted into its
3746 // constituent fields, and so we have to re-assemble it
3747 copyBlkClass = objClass;
3749 // Alignment constraints may cause us not to use (to "skip") some argument
3750 // registers. Add those, if any, to the skipped (int) arg reg mask.
3751 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3752 #endif // _TARGET_ARM_
3755 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3757 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3758 argObj->ChangeOper(GT_LCL_FLD);
3759 argObj->gtType = structBaseType;
3764 // Not a GT_LCL_VAR, so we can just change the type on the node
3765 argObj->gtType = structBaseType;
3767 assert(varTypeCanReg(argObj->TypeGet()) ||
3768 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3772 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3774 #endif // not _TARGET_X86_
3775 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3776 if ((structBaseType == TYP_STRUCT) &&
3777 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3778 !passStructInRegisters
3779 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3781 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3784 if (isHfaArg && passUsingFloatRegs)
3786 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3790 // If the valuetype size is not a multiple of sizeof(void*),
3791 // we must copyblk to a temp before doing the obj to avoid
3792 // the obj reading memory past the end of the valuetype
3793 CLANG_FORMAT_COMMENT_ANCHOR;
3795 if (roundupSize > originalSize)
3797 copyBlkClass = objClass;
3799 // There are a few special cases where we can omit using a CopyBlk
3800 // where we normally would need to use one.
3802 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3804 copyBlkClass = NO_CLASS_HANDLE;
3808 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3813 #ifdef _TARGET_64BIT_
3816 hasMultiregStructArgs = true;
3818 #elif defined(_TARGET_ARM_)
3819 // Build the mkrefany as a GT_FIELD_LIST in this function
3820 if (size > 1 && argx->gtOper != GT_MKREFANY)
3822 hasMultiregStructArgs = true;
3824 #endif // _TARGET_ARM_
3827 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3831 // Figure out if the argument will be passed in a register.
3834 if (isRegParamType(genActualType(argx->TypeGet()))
3835 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3836 && (!isStructArg || structDesc.passedInRegisters)
3841 if (passUsingFloatRegs)
3843 // First, see if it can be back-filled
3844 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3845 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3846 (size == 1)) // The size to back-fill is one float register
3848 // Back-fill the register.
3849 isBackFilled = true;
3850 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3851 fltArgSkippedRegMask &=
3852 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3853 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3854 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3857 // Does the entire float, double, or HFA fit in the FP arg registers?
3858 // Check if the last register needed is still in the argument register range.
3859 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3863 anyFloatStackArgs = true;
3868 isRegArg = intArgRegNum < MAX_REG_ARG;
3870 #elif defined(_TARGET_ARM64_)
3871 if (passUsingFloatRegs)
3873 // Check if the last register needed is still in the fp argument register range.
3874 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3876 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3877 if (isHfaArg && !isRegArg)
3879 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3882 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3883 size = roundupSize / TARGET_POINTER_SIZE;
3885 // We also must update fltArgRegNum so that we no longer try to
3886 // allocate any new floating point registers for args
3887 // This prevents us from backfilling a subsequent arg into d7
3889 fltArgRegNum = MAX_FLOAT_REG_ARG;
3894 // Check if the last register needed is still in the int argument register range.
3895 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3897 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3898 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3899 // This prevents us from backfilling a subsequent arg into x7
3901 if (!isRegArg && (size > 1))
3903 // We also must update intArgRegNum so that we no longer try to
3904 // allocate any new general purpose registers for args
3906 intArgRegNum = maxRegArgs;
3909 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3911 #if defined(UNIX_AMD64_ABI)
3913 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3914 // Here a struct can be passed in register following the classifications of its members and size.
3915 // Now make sure there are actually enough registers to do so.
3918 unsigned int structFloatRegs = 0;
3919 unsigned int structIntRegs = 0;
3920 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3922 if (structDesc.IsIntegralSlot(i))
3926 else if (structDesc.IsSseSlot(i))
3932 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3933 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3936 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3938 if (passUsingFloatRegs)
3940 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3944 isRegArg = intArgRegNum < MAX_REG_ARG;
3947 #else // !defined(UNIX_AMD64_ABI)
3948 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3949 #endif // !defined(UNIX_AMD64_ABI)
3950 #endif // _TARGET_ARM_
3957 #ifndef LEGACY_BACKEND
3958 // If there are nonstandard args (outside the calling convention) they were inserted above
3959 // and noted them in a table so we can recognize them here and build their argInfo.
3961 // They should not affect the placement of any other args or stack space required.
3962 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3963 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3964 if (isNonStandard && (nonStdRegNum == REG_STK))
3968 #if defined(_TARGET_X86_)
3969 else if (call->IsTailCallViaHelper())
3971 // We have already (before calling fgMorphArgs()) appended the 4 special args
3972 // required by the x86 tailcall helper. These args are required to go on the
3973 // stack. Force them to the stack here.
3974 assert(numArgs >= 4);
3975 if (argIndex >= numArgs - 4)
3980 #endif // defined(_TARGET_X86_)
3981 #endif // !LEGACY_BACKEND
3982 } // end !reMorphing
3985 // Now we know if the argument goes in registers or not and how big it is,
3986 // whether we had to just compute it or this is a re-morph call and we looked it up.
3988 CLANG_FORMAT_COMMENT_ANCHOR;
3991 // If we ever allocate a floating point argument to the stack, then all
3992 // subsequent HFA/float/double arguments go on the stack.
3993 if (!isRegArg && passUsingFloatRegs)
3995 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3997 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
4001 // If we think we're going to split a struct between integer registers and the stack, check to
4002 // see if we've already assigned a floating-point arg to the stack.
4003 if (isRegArg && // We decided above to use a register for the argument
4004 !passUsingFloatRegs && // We're using integer registers
4005 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
4006 anyFloatStackArgs) // We've already used the stack for a floating-point argument
4008 isRegArg = false; // Change our mind; don't pass this struct partially in registers
4010 // Skip the rest of the integer argument registers
4011 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4013 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4017 #endif // _TARGET_ARM_
4021 regNumber nextRegNum = REG_STK;
4022 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4023 regNumber nextOtherRegNum = REG_STK;
4024 unsigned int structFloatRegs = 0;
4025 unsigned int structIntRegs = 0;
4027 if (isStructArg && structDesc.passedInRegisters)
4029 // It is a struct passed in registers. Assign the next available register.
4030 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4031 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4032 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4034 if (structDesc.IsIntegralSlot(i))
4036 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4039 else if (structDesc.IsSseSlot(i))
4041 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4047 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4049 // fill in or update the argInfo table
4050 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4051 : genMapIntRegArgNumToRegNum(intArgRegNum);
4054 #ifdef _TARGET_AMD64_
4055 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4060 fgArgTabEntryPtr newArgEntry;
4063 // This is a register argument - possibly update it in the table
4064 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4070 nextRegNum = nonStdRegNum;
4073 // This is a register argument - put it in the table
4074 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4075 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4077 isStructArg, nextOtherRegNum, &structDesc
4078 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4081 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4082 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4083 newArgEntry->SetIsBackFilled(isBackFilled);
4084 newArgEntry->isNonStandard = isNonStandard;
4087 if (newArgEntry->isNonStandard)
4092 // Set up the next intArgRegNum and fltArgRegNum values.
4095 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4098 intArgRegNum += structIntRegs;
4099 fltArgRegNum += structFloatRegs;
4102 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4104 if (passUsingFloatRegs)
4106 fltArgRegNum += size;
4108 #ifdef WINDOWS_AMD64_ABI
4109 // Whenever we pass an integer register argument
4110 // we skip the corresponding floating point register argument
4111 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4112 #endif // WINDOWS_AMD64_ABI
4114 if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4116 // This indicates a partial enregistration of a struct type
4117 assert(varTypeIsStruct(argx));
4118 unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4119 assert((unsigned char)numRegsPartial == numRegsPartial);
4120 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4121 fltArgRegNum = MAX_FLOAT_REG_ARG;
4123 #endif // _TARGET_ARM_
4127 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4129 // we are setting up the fixed return buffer register argument
4130 // so don't increment intArgRegNum
4135 // Increment intArgRegNum by 'size' registers
4136 intArgRegNum += size;
4139 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4140 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4141 #endif // _TARGET_AMD64_
4143 if (intArgRegNum > MAX_REG_ARG)
4145 // This indicates a partial enregistration of a struct type
4146 assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
4147 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4148 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4149 assert((unsigned char)numRegsPartial == numRegsPartial);
4150 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4151 intArgRegNum = MAX_REG_ARG;
4152 fgPtrArgCntCur += size - numRegsPartial;
4154 #endif // _TARGET_ARM_
4159 else // We have an argument that is not passed in a register
4161 fgPtrArgCntCur += size;
4163 // If the register arguments have not been determined then we must fill in the argInfo
4167 // This is a stack argument - possibly update it in the table
4168 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4172 // This is a stack argument - put it in the table
4173 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4174 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4178 if (copyBlkClass != NO_CLASS_HANDLE)
4180 noway_assert(!reMorphing);
4181 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4182 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4184 // This can cause a GTF_EXCEPT flag to be set.
4185 // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4186 // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4187 // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4188 // any struct arguments.
4189 // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4190 flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4192 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4193 hasStackArgCopy = true;
4197 #ifndef LEGACY_BACKEND
4198 if (argx->gtOper == GT_MKREFANY)
4200 // 'Lower' the MKREFANY tree and insert it.
4201 noway_assert(!reMorphing);
4203 #ifndef _TARGET_64BIT_
4205 // Build the mkrefany as a GT_FIELD_LIST
4206 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4207 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4208 (void)new (this, GT_FIELD_LIST)
4209 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4210 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4211 fp->node = fieldList;
4212 args->gtOp.gtOp1 = fieldList;
4214 #else // _TARGET_64BIT_
4217 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4218 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4219 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4221 // Build the mkrefany as a comma node:
4222 // (tmp.ptr=argx),(tmp.type=handle)
4223 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4224 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4225 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4226 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4227 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4228 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4230 GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4231 GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4232 GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4234 // Change the expression to "(tmp=val)"
4235 args->gtOp.gtOp1 = asg;
4237 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4238 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4239 lvaSetVarAddrExposed(tmp);
4240 #endif // _TARGET_64BIT_
4242 #endif // !LEGACY_BACKEND
4244 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4247 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4248 if ((lclNode != nullptr) &&
4249 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4251 // Make a GT_FIELD_LIST of the field lclVars.
4252 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4253 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4254 GenTreeFieldList* fieldList = nullptr;
4255 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4256 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4258 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4259 if (fieldList == nullptr)
4261 lcl->SetLclNum(fieldLclNum);
4262 lcl->ChangeOper(GT_LCL_VAR);
4263 lcl->gtType = fieldVarDsc->lvType;
4264 fieldList = new (this, GT_FIELD_LIST)
4265 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4266 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4267 fp->node = fieldList;
4268 args->gtOp.gtOp1 = fieldList;
4272 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4273 fieldList = new (this, GT_FIELD_LIST)
4274 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4279 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4281 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4282 if (isStructArg && !isRegArg)
4284 nonRegPassedStructSlots += size;
4287 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4291 } // end foreach argument loop
4295 call->fgArgInfo->ArgsComplete();
4297 #ifdef LEGACY_BACKEND
4298 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4299 #if defined(_TARGET_ARM_)
4300 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4302 if (fltArgRegNum > 0)
4304 #if defined(_TARGET_ARM_)
4305 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4308 #endif // LEGACY_BACKEND
4311 if (call->gtCallArgs)
4313 UpdateGT_LISTFlags(call->gtCallArgs);
4316 /* Process the function address, if indirect call */
4318 if (call->gtCallType == CT_INDIRECT)
4320 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4323 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4325 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4330 /* Remember the maximum value we ever see */
4332 if (fgPtrArgCntMax < fgPtrArgCntCur)
4334 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4335 fgPtrArgCntMax = fgPtrArgCntCur;
4338 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4339 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4341 /* The call will pop all the arguments we pushed */
4343 fgPtrArgCntCur = genPtrArgCntSav;
4345 #if FEATURE_FIXED_OUT_ARGS
4347 // Record the outgoing argument size. If the call is a fast tail
4348 // call, it will setup its arguments in incoming arg area instead
4349 // of the out-going arg area, so we don't need to track the
4350 // outgoing arg size.
4351 if (!call->IsFastTailCall())
4353 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4355 #if defined(UNIX_AMD64_ABI)
4356 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4358 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4360 // First slots go in registers only, no stack needed.
4361 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4362 // and ignores floating point args (it is overly conservative in that case).
4363 preallocatedArgCount = nonRegPassedStructSlots;
4364 if (argSlots > MAX_REG_ARG)
4366 preallocatedArgCount += argSlots - MAX_REG_ARG;
4368 #endif // UNIX_AMD64_ABI
4370 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4371 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4376 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4377 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4381 #endif // FEATURE_FIXED_OUT_ARGS
4383 /* Update the 'side effect' flags value for the call */
4385 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4387 // If the register arguments have already been determined
4388 // or we have no register arguments then we don't need to
4389 // call SortArgs() and EvalArgsToTemps()
4391 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4392 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4393 // is added to make sure to call EvalArgsToTemp.
4394 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4395 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4397 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4400 // This is the first time that we morph this call AND it has register arguments.
4401 // Follow into the code below and do the 'defer or eval to temp' analysis.
4403 call->fgArgInfo->SortArgs();
4405 call->fgArgInfo->EvalArgsToTemps();
4407 // We may have updated the arguments
4408 if (call->gtCallArgs)
4410 UpdateGT_LISTFlags(call->gtCallArgs);
4414 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4416 // Rewrite the struct args to be passed by value on stack or in registers.
4417 fgMorphSystemVStructArgs(call, hasStructArgument);
4419 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4421 #ifndef LEGACY_BACKEND
4422 // In the future we can migrate UNIX_AMD64 to use this
4423 // method instead of fgMorphSystemVStructArgs
4425 // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4426 if (hasMultiregStructArgs)
4428 fgMorphMultiregStructArgs(call);
4430 #endif // LEGACY_BACKEND
4432 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4437 fgArgInfoPtr argInfo = call->fgArgInfo;
4438 for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4440 fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4441 curArgEntry->Dump();
4449 #pragma warning(pop)
4452 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4453 // fgMorphSystemVStructArgs:
4454 // Rewrite the struct args to be passed by value on stack or in registers.
4457 // call: The call whose arguments need to be morphed.
4458 // hasStructArgument: Whether this call has struct arguments.
4460 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4462 unsigned flagsSummary = 0;
4466 if (hasStructArgument)
4468 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4470 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4472 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4473 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4474 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4475 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4476 // otherwise points to the list in the late args list.
4477 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4478 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4479 assert(fgEntryPtr != nullptr);
4480 GenTreePtr argx = fgEntryPtr->node;
4481 GenTreePtr lateList = nullptr;
4482 GenTreePtr lateNode = nullptr;
4486 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4488 assert(list->OperIsList());
4490 GenTreePtr argNode = list->Current();
4491 if (argx == argNode)
4498 assert(lateList != nullptr && lateNode != nullptr);
4500 GenTreePtr arg = argx;
4501 bool argListCreated = false;
4503 var_types type = arg->TypeGet();
4505 if (varTypeIsStruct(type))
4507 var_types originalType = type;
4508 // If we have already processed the arg...
4509 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4514 // If already OBJ it is set properly already.
4515 if (arg->OperGet() == GT_OBJ)
4517 assert(!fgEntryPtr->structDesc.passedInRegisters);
4521 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4522 (arg->OperGet() == GT_ADDR &&
4523 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4525 GenTreeLclVarCommon* lclCommon =
4526 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4527 if (fgEntryPtr->structDesc.passedInRegisters)
4529 if (fgEntryPtr->structDesc.eightByteCount == 1)
4531 // Change the type and below the code will change the LclVar to a LCL_FLD
4532 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4533 fgEntryPtr->structDesc.eightByteSizes[0]);
4535 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4537 // Create LCL_FLD for each eightbyte.
4538 argListCreated = true;
4541 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4543 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4544 fgEntryPtr->structDesc.eightByteSizes[0]);
4545 GenTreeFieldList* fieldList =
4546 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4547 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4550 // Second eightbyte.
4551 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4552 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4553 .eightByteClassifications[1],
4554 fgEntryPtr->structDesc.eightByteSizes[1]),
4555 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4557 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4558 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4559 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4563 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4568 // If we didn't change the type of the struct, it means
4569 // its classification doesn't support to be passed directly through a
4570 // register, so we need to pass a pointer to the destination where
4571 // where we copied the struct to.
4572 if (!argListCreated)
4574 if (fgEntryPtr->structDesc.passedInRegisters)
4580 // Make sure this is an addr node.
4581 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4583 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4586 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4588 // Create an Obj of the temp to use it as a call argument.
4589 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4596 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4597 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4598 assert(fgEntryPtr != nullptr);
4599 GenTreePtr argx = fgEntryPtr->node;
4600 GenTreePtr lateList = nullptr;
4601 GenTreePtr lateNode = nullptr;
4604 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4606 assert(list->OperIsList());
4608 GenTreePtr argNode = list->Current();
4609 if (argx == argNode)
4616 assert(lateList != nullptr && lateNode != nullptr);
4619 fgEntryPtr->node = arg;
4622 lateList->gtOp.gtOp1 = arg;
4626 args->gtOp.gtOp1 = arg;
4633 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4635 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4637 //-----------------------------------------------------------------------------
4638 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4639 // call fgMorphMultiregStructArg on each of them.
4642 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4645 // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4646 // The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4647 // which is only used for struct arguments.
4648 // If this method fails to find any TYP_STRUCT arguments it will assert.
4650 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4654 bool foundStructArg = false;
4655 unsigned initialFlags = call->gtFlags;
4656 unsigned flagsSummary = 0;
4657 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4659 // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4660 // in the future AMD64_UNIX will also use this method
4661 CLANG_FORMAT_COMMENT_ANCHOR;
4664 assert(!"Logic error: no MultiregStructArgs for X86");
4666 #ifdef _TARGET_AMD64_
4667 #if defined(UNIX_AMD64_ABI)
4668 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4669 #else // WINDOWS_AMD64_ABI
4670 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4671 #endif // !UNIX_AMD64_ABI
4674 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4676 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4677 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4678 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4679 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4680 // otherwise points to the list in the late args list.
4681 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4682 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4683 assert(fgEntryPtr != nullptr);
4684 GenTreePtr argx = fgEntryPtr->node;
4685 GenTreePtr lateList = nullptr;
4686 GenTreePtr lateNode = nullptr;
4690 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4692 assert(list->OperIsList());
4694 GenTreePtr argNode = list->Current();
4695 if (argx == argNode)
4702 assert(lateList != nullptr && lateNode != nullptr);
4705 GenTreePtr arg = argx;
4707 if (arg->TypeGet() == TYP_STRUCT)
4709 foundStructArg = true;
4711 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4713 // Did we replace 'argx' with a new tree?
4716 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4718 // link the new arg node into either the late arg list or the gtCallArgs list
4721 lateList->gtOp.gtOp1 = arg;
4725 args->gtOp.gtOp1 = arg;
4731 // We should only call this method when we actually have one or more multireg struct args
4732 assert(foundStructArg);
4735 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4738 //-----------------------------------------------------------------------------
4739 // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
4740 // Morph the argument into a set of GT_FIELD_LIST nodes.
4743 // arg - A GenTree node containing a TYP_STRUCT arg that
4744 // is to be passed in multiple registers
4745 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4748 // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4749 // for passing in multiple registers.
4750 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4751 // and if they are at the appropriate offsets we will use the struct promted fields
4752 // in the GT_FIELD_LIST nodes that we create.
4753 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4754 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4755 // this also forces the struct to be stack allocated into the local frame.
4756 // For the GT_OBJ case will clone the address expression and generate two (or more)
4758 // Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4760 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4762 assert(arg->TypeGet() == TYP_STRUCT);
4764 #ifndef _TARGET_ARMARCH_
4765 NYI("fgMorphMultiregStructArg requires implementation for this target");
4769 if (fgEntryPtr->isSplit)
4771 if (fgEntryPtr->isHfaRegArg)
4773 // We cannot handle HFA split struct morphed to GT_FIELD_LIST yet
4774 NYI_ARM("Struct split between float registers and stack");
4776 else if (fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4)
4781 else if (!fgEntryPtr->isHfaRegArg && fgEntryPtr->numSlots > 4)
4787 #if FEATURE_MULTIREG_ARGS
4788 // Examine 'arg' and setup argValue objClass and structSize
4790 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4791 GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
4792 unsigned structSize = 0;
4794 if (arg->OperGet() == GT_OBJ)
4796 GenTreeObj* argObj = arg->AsObj();
4797 objClass = argObj->gtClass;
4798 structSize = info.compCompHnd->getClassSize(objClass);
4800 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4802 if (argObj->gtOp1->OperGet() == GT_ADDR)
4804 argValue = argObj->gtOp1->gtOp.gtOp1;
4807 else if (arg->OperGet() == GT_LCL_VAR)
4809 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4810 unsigned varNum = varNode->gtLclNum;
4811 assert(varNum < lvaCount);
4812 LclVarDsc* varDsc = &lvaTable[varNum];
4814 objClass = lvaGetStruct(varNum);
4815 structSize = varDsc->lvExactSize;
4817 noway_assert(objClass != nullptr);
4819 var_types hfaType = TYP_UNDEF;
4820 var_types elemType = TYP_UNDEF;
4821 unsigned elemCount = 0;
4822 unsigned elemSize = 0;
4823 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4825 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4826 if (varTypeIsFloating(hfaType))
4829 elemSize = genTypeSize(elemType);
4830 elemCount = structSize / elemSize;
4831 assert(elemSize * elemCount == structSize);
4832 for (unsigned inx = 0; inx < elemCount; inx++)
4834 type[inx] = elemType;
4839 #ifdef _TARGET_ARM64_
4840 assert(structSize <= 2 * TARGET_POINTER_SIZE);
4841 #elif defined(_TARGET_ARM_)
4842 assert(structSize <= 4 * TARGET_POINTER_SIZE);
4845 #ifdef _TARGET_ARM64_
4846 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4847 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4849 type[0] = getJitGCType(gcPtrs[0]);
4850 type[1] = getJitGCType(gcPtrs[1]);
4851 #elif defined(_TARGET_ARM_)
4852 BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
4853 elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4854 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4855 for (unsigned inx = 0; inx < elemCount; inx++)
4857 type[inx] = getJitGCType(gcPtrs[inx]);
4859 #endif // _TARGET_ARM_
4861 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4863 elemSize = TARGET_POINTER_SIZE;
4864 // We can safely widen this to aligned bytes since we are loading from
4865 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4866 // lives in the stack frame or will be a promoted field.
4868 structSize = elemCount * TARGET_POINTER_SIZE;
4870 else // we must have a GT_OBJ
4872 assert(argValue->OperGet() == GT_OBJ);
4874 // We need to load the struct from an arbitrary address
4875 // and we can't read past the end of the structSize
4876 // We adjust the last load type here
4878 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4879 unsigned lastElem = elemCount - 1;
4880 if (remainingBytes != 0)
4882 switch (remainingBytes)
4885 type[lastElem] = TYP_BYTE;
4888 type[lastElem] = TYP_SHORT;
4890 #ifdef _TARGET_ARM64_
4892 type[lastElem] = TYP_INT;
4894 #endif // _TARGET_ARM64_
4896 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4902 // We should still have a TYP_STRUCT
4903 assert(argValue->TypeGet() == TYP_STRUCT);
4905 GenTreeFieldList* newArg = nullptr;
4907 // Are we passing a struct LclVar?
4909 if (argValue->OperGet() == GT_LCL_VAR)
4911 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4912 unsigned varNum = varNode->gtLclNum;
4913 assert(varNum < lvaCount);
4914 LclVarDsc* varDsc = &lvaTable[varNum];
4916 // At this point any TYP_STRUCT LclVar must be an aligned struct
4917 // or an HFA struct, both which are passed by value.
4919 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4921 varDsc->lvIsMultiRegArg = true;
4926 JITDUMP("Multireg struct argument V%02u : ");
4931 // This local variable must match the layout of the 'objClass' type exactly
4932 if (varDsc->lvIsHfa())
4934 // We have a HFA struct
4935 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4936 noway_assert(elemSize == genTypeSize(elemType));
4937 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4938 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4940 for (unsigned inx = 0; (inx < elemCount); inx++)
4942 noway_assert(type[inx] == elemType);
4947 #ifdef _TARGET_ARM64_
4948 // We must have a 16-byte struct (non-HFA)
4949 noway_assert(elemCount == 2);
4950 #elif defined(_TARGET_ARM_)
4951 noway_assert(elemCount <= 4);
4954 for (unsigned inx = 0; inx < elemCount; inx++)
4956 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4958 // We setup the type[inx] value above using the GC info from 'objClass'
4959 // This GT_LCL_VAR must have the same GC layout info
4961 if (currentGcLayoutType != TYPE_GC_NONE)
4963 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4967 // We may have use a small type when we setup the type[inx] values above
4968 // We can safely widen this to TYP_I_IMPL
4969 type[inx] = TYP_I_IMPL;
4974 #ifdef _TARGET_ARM64_
4975 // Is this LclVar a promoted struct with exactly 2 fields?
4976 // TODO-ARM64-CQ: Support struct promoted HFA types here
4977 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4979 // See if we have two promoted fields that start at offset 0 and 8?
4980 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4981 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4983 // Did we find the promoted fields at the necessary offsets?
4984 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4986 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4987 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4989 var_types loType = loVarDsc->lvType;
4990 var_types hiType = hiVarDsc->lvType;
4992 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4994 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4995 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4997 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5000 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5005 // We can use the struct promoted field as the two arguments
5007 GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
5008 GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
5010 // Create a new tree for 'arg'
5011 // replace the existing LDOBJ(ADDR(LCLVAR))
5012 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
5014 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
5015 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
5022 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5024 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5026 #elif defined(_TARGET_ARM_)
5027 // Is this LclVar a promoted struct with exactly same size?
5028 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5030 // See if we have promoted fields?
5031 unsigned varNums[4];
5032 bool hasBadVarNum = false;
5033 for (unsigned inx = 0; inx < elemCount; inx++)
5035 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5036 if (varNums[inx] == BAD_VAR_NUM)
5038 hasBadVarNum = true;
5043 // Did we find the promoted fields at the necessary offsets?
5046 LclVarDsc* varDscs[4];
5047 var_types varType[4];
5048 bool varIsFloat = false;
5050 for (unsigned inx = 0; inx < elemCount; inx++)
5052 varDscs[inx] = &lvaTable[varNums[inx]];
5053 varType[inx] = varDscs[inx]->lvType;
5054 if (varTypeIsFloating(varType[inx]))
5056 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5058 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5060 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5063 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5072 unsigned offset = 0;
5073 GenTreeFieldList* listEntry = nullptr;
5074 // We can use the struct promoted field as arguments
5075 for (unsigned inx = 0; inx < elemCount; inx++)
5077 GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5078 // Create a new tree for 'arg'
5079 // replace the existing LDOBJ(ADDR(LCLVAR))
5080 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5081 if (newArg == nullptr)
5085 offset += TARGET_POINTER_SIZE;
5093 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5095 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5097 #endif // _TARGET_ARM_
5100 // If we didn't set newarg to a new List Node tree
5102 if (newArg == nullptr)
5104 if (fgEntryPtr->regNum == REG_STK)
5106 // We leave this stack passed argument alone
5110 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5111 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5113 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5115 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5116 unsigned varNum = varNode->gtLclNum;
5117 assert(varNum < lvaCount);
5118 LclVarDsc* varDsc = &lvaTable[varNum];
5120 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5121 unsigned lastOffset = baseOffset + (elemCount * elemSize);
5123 // The allocated size of our LocalVar must be at least as big as lastOffset
5124 assert(varDsc->lvSize() >= lastOffset);
5126 if (varDsc->lvStructGcCount > 0)
5128 // alignment of the baseOffset is required
5129 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5130 noway_assert(elemSize == TARGET_POINTER_SIZE);
5131 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
5132 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
5133 for (unsigned inx = 0; (inx < elemCount); inx++)
5135 // The GC information must match what we setup using 'objClass'
5136 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5139 else // this varDsc contains no GC pointers
5141 for (unsigned inx = 0; inx < elemCount; inx++)
5143 // The GC information must match what we setup using 'objClass'
5144 noway_assert(!varTypeIsGC(type[inx]));
5149 // We create a list of GT_LCL_FLDs nodes to pass this struct
5151 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5153 // Create a new tree for 'arg'
5154 // replace the existing LDOBJ(ADDR(LCLVAR))
5155 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5157 unsigned offset = baseOffset;
5158 GenTreeFieldList* listEntry = nullptr;
5159 for (unsigned inx = 0; inx < elemCount; inx++)
5161 elemSize = genTypeSize(type[inx]);
5162 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5163 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5164 if (newArg == nullptr)
5171 // Are we passing a GT_OBJ struct?
5173 else if (argValue->OperGet() == GT_OBJ)
5175 GenTreeObj* argObj = argValue->AsObj();
5176 GenTreePtr baseAddr = argObj->gtOp1;
5177 var_types addrType = baseAddr->TypeGet();
5179 // Create a new tree for 'arg'
5180 // replace the existing LDOBJ(EXPR)
5181 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5184 unsigned offset = 0;
5185 GenTreeFieldList* listEntry = nullptr;
5186 for (unsigned inx = 0; inx < elemCount; inx++)
5188 elemSize = genTypeSize(type[inx]);
5189 GenTreePtr curAddr = baseAddr;
5192 GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5193 noway_assert(baseAddrDup != nullptr);
5194 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5200 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5202 // For safety all GT_IND should have at least GT_GLOB_REF set.
5203 curItem->gtFlags |= GTF_GLOB_REF;
5204 if (fgAddrCouldBeNull(curItem))
5206 // This indirection can cause a GPF if the address could be null.
5207 curItem->gtFlags |= GTF_EXCEPT;
5210 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5211 if (newArg == nullptr)
5221 // If we reach here we should have set newArg to something
5222 if (newArg == nullptr)
5224 gtDispTree(argValue);
5225 assert(!"Missing case in fgMorphMultiregStructArg");
5230 printf("fgMorphMultiregStructArg created tree:\n");
5235 arg = newArg; // consider calling fgMorphTree(newArg);
5237 #endif // FEATURE_MULTIREG_ARGS
5242 // Make a copy of a struct variable if necessary, to pass to a callee.
5243 // returns: tree that computes address of the outgoing arg
5244 void Compiler::fgMakeOutgoingStructArgCopy(
5248 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5249 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5251 GenTree* argx = args->Current();
5252 noway_assert(argx->gtOper != GT_MKREFANY);
5253 // See if we need to insert a copy at all
5254 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5255 // but if there is only one use and no loops, the use must be last.
5256 GenTreeLclVarCommon* lcl = nullptr;
5257 if (argx->OperIsLocal())
5259 lcl = argx->AsLclVarCommon();
5261 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5263 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5267 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5268 if (lvaIsImplicitByRefLocal(varNum))
5270 LclVarDsc* varDsc = &lvaTable[varNum];
5271 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5272 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5273 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5274 // struct parameters if they are passed as arguments to a tail call.
5275 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5277 varDsc->lvRefCnt = 0;
5278 args->gtOp.gtOp1 = lcl;
5279 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5282 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5288 if (fgOutgoingArgTemps == nullptr)
5290 fgOutgoingArgTemps = hashBv::Create(this);
5296 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5297 // We do not reuse within a statement.
5298 if (!opts.MinOpts())
5301 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5303 LclVarDsc* varDsc = &lvaTable[lclNum];
5304 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5305 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5307 tmp = (unsigned)lclNum;
5309 JITDUMP("reusing outgoing struct arg");
5316 // Create the CopyBlk tree and insert it.
5320 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5321 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5322 lvaSetStruct(tmp, copyBlkClass, false);
5323 fgOutgoingArgTemps->setBit(tmp);
5326 fgCurrentlyInUseArgTemps->setBit(tmp);
5328 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5329 // allocated on stack and address of it needs to be passed.
5330 if (lclVarIsSIMDType(tmp))
5332 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5335 // Create a reference to the temp
5336 GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5337 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5339 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5340 // to ref counting of the lclVars.
5341 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5344 if (argx->gtOper == GT_OBJ)
5346 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5350 argx->gtFlags |= GTF_DONT_CSE;
5353 // Copy the valuetype to the temp
5354 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5355 GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5356 copyBlk = fgMorphCopyBlock(copyBlk);
5358 #if FEATURE_FIXED_OUT_ARGS
5360 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5361 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5362 GenTreePtr arg = copyBlk;
5364 #else // FEATURE_FIXED_OUT_ARGS
5366 // Structs are always on the stack, and thus never need temps
5367 // so we have to put the copy and temp all into one expression
5368 GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5370 // Change the expression to "(tmp=val),tmp"
5371 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5373 #endif // FEATURE_FIXED_OUT_ARGS
5375 args->gtOp.gtOp1 = arg;
5376 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5382 // See declaration for specification comment.
5383 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5384 unsigned firstArgRegNum,
5385 regMaskTP* pArgSkippedRegMask)
5387 assert(varDsc->lvPromoted);
5388 // There's no way to do these calculations without breaking abstraction and assuming that
5389 // integer register arguments are consecutive ints. They are on ARM.
5391 // To start, figure out what register contains the last byte of the first argument.
5392 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5393 unsigned lastFldRegOfLastByte =
5394 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5397 // Now we're keeping track of the register that the last field ended in; see what registers
5398 // subsequent fields start in, and whether any are skipped.
5399 // (We assume here the invariant that the fields are sorted in offset order.)
5400 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5402 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5403 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5404 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5405 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5406 // This loop should enumerate the offsets of any registers skipped.
5407 // Find what reg contains the last byte:
5408 // And start at the first register after that. If that isn't the first reg of the current
5409 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5410 skippedRegOffsets++)
5412 // If the register number would not be an arg reg, we're done.
5413 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5415 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5417 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5421 #endif // _TARGET_ARM_
5423 //****************************************************************************
5424 // fgFixupStructReturn:
5425 // The companion to impFixupCallStructReturn. Now that the importer is done
5426 // change the gtType to the precomputed native return type
5427 // requires that callNode currently has a struct type
5429 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5431 assert(varTypeIsStruct(callNode));
5433 GenTreeCall* call = callNode->AsCall();
5434 bool callHasRetBuffArg = call->HasRetBufArg();
5435 bool isHelperCall = call->IsHelperCall();
5437 // Decide on the proper return type for this call that currently returns a struct
5439 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5440 Compiler::structPassingKind howToReturnStruct;
5441 var_types returnType;
5443 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5444 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5446 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5447 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5448 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5452 assert(!callHasRetBuffArg);
5453 assert(retClsHnd == NO_CLASS_HANDLE);
5455 // Now that we are past the importer, re-type this node
5456 howToReturnStruct = SPK_PrimitiveType;
5457 returnType = (var_types)call->gtReturnType;
5461 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5464 if (howToReturnStruct == SPK_ByReference)
5466 assert(returnType == TYP_UNKNOWN);
5467 assert(callHasRetBuffArg);
5471 assert(returnType != TYP_UNKNOWN);
5473 if (returnType != TYP_STRUCT)
5475 // Widen the primitive type if necessary
5476 returnType = genActualType(returnType);
5478 call->gtType = returnType;
5481 #if FEATURE_MULTIREG_RET
5482 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5483 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5484 #else // !FEATURE_MULTIREG_RET
5485 // No more struct returns
5486 assert(call->TypeGet() != TYP_STRUCT);
5489 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5490 // If it was a struct return, it has been transformed into a call
5491 // with a return buffer (that returns TYP_VOID) or into a return
5492 // of a primitive/enregisterable type
5493 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5497 /*****************************************************************************
5499 * A little helper used to rearrange nested commutative operations. The
5500 * effect is that nested associative, commutative operations are transformed
5501 * into a 'left-deep' tree, i.e. into something like this:
5503 * (((a op b) op c) op d) op...
5508 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5516 op1 = tree->gtOp.gtOp1;
5517 op2 = tree->gtOp.gtOp2;
5518 oper = tree->OperGet();
5520 noway_assert(GenTree::OperIsCommutative(oper));
5521 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5522 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5523 noway_assert(oper == op2->gtOper);
5525 // Commutativity doesn't hold if overflow checks are needed
5527 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5532 if (gtIsActiveCSE_Candidate(op2))
5534 // If we have marked op2 as a CSE candidate,
5535 // we can't perform a commutative reordering
5536 // because any value numbers that we computed for op2
5537 // will be incorrect after performing a commutative reordering
5542 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5547 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5548 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5553 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5555 // We could deal with this, but we were always broken and just hit the assert
5556 // below regarding flags, which means it's not frequent, so will just bail out.
5561 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5563 GenTreePtr ad1 = op2->gtOp.gtOp1;
5564 GenTreePtr ad2 = op2->gtOp.gtOp2;
5566 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5567 // We can not reorder such GT_OR trees
5569 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5574 /* Change "(x op (y op z))" to "(x op y) op z" */
5575 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5577 GenTreePtr new_op1 = op2;
5579 new_op1->gtOp.gtOp1 = op1;
5580 new_op1->gtOp.gtOp2 = ad1;
5582 /* Change the flags. */
5584 // Make sure we arent throwing away any flags
5585 noway_assert((new_op1->gtFlags &
5586 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5587 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5588 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5591 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5592 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5594 /* Retype new_op1 if it has not/become a GC ptr. */
5596 if (varTypeIsGC(op1->TypeGet()))
5598 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5599 oper == GT_ADD) || // byref(ref + (int+int))
5600 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5601 oper == GT_OR)); // int(gcref | int(gcref|intval))
5603 new_op1->gtType = tree->gtType;
5605 else if (varTypeIsGC(ad2->TypeGet()))
5607 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5608 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5609 new_op1->gtType = TYP_I_IMPL;
5612 // If new_op1 is a new expression. Assign it a new unique value number.
5613 // vnStore is null before the ValueNumber phase has run
5614 if (vnStore != nullptr)
5616 // We can only keep the old value number on new_op1 if both op1 and ad2
5617 // have the same non-NoVN value numbers. Since op is commutative, comparing
5618 // only ad2 and op1 is enough.
5619 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5620 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5621 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5623 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5627 tree->gtOp.gtOp1 = new_op1;
5628 tree->gtOp.gtOp2 = ad2;
5630 /* If 'new_op1' is now the same nested op, process it recursively */
5632 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5634 fgMoveOpsLeft(new_op1);
5637 /* If 'ad2' is now the same nested op, process it
5638 * Instead of recursion, we set up op1 and op2 for the next loop.
5643 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5650 /*****************************************************************************/
5652 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5654 GenTreeBoundsChk* bndsChk = nullptr;
5655 SpecialCodeKind kind = SCK_RNGCHK_FAIL;
5658 if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5659 #else // FEATURE_SIMD
5660 if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5661 #endif // FEATURE_SIMD
5663 bndsChk = tree->AsBoundsChk();
5664 kind = tree->gtBoundsChk.gtThrowKind;
5668 noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5672 unsigned callStkDepth = fgPtrArgCntCur;
5674 // only x86 pushes args
5675 const unsigned callStkDepth = 0;
5682 // we need to initialize this field
5683 if (fgGlobalMorph && bndsChk != nullptr)
5685 bndsChk->gtStkDepth = callStkDepth;
5689 if (!opts.compDbgCode)
5691 if (delay || compIsForInlining())
5693 /* We delay this until after loop-oriented range check
5694 analysis. For now we merely store the current stack
5695 level in the tree node.
5697 if (bndsChk != nullptr)
5699 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5700 bndsChk->gtStkDepth = callStkDepth;
5705 /* Create/find the appropriate "range-fail" label */
5707 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5708 noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5710 unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5712 BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5714 /* Add the label to the indirection node */
5716 if (bndsChk != nullptr)
5718 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5724 /*****************************************************************************
5726 * Expand a GT_INDEX node and fully morph the child operands
5728 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5729 * the array element. We expand the GT_INDEX node into a larger tree that
5730 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5731 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5732 * For complex array or index expressions one or more GT_COMMA assignments
5733 * are inserted so that we only evaluate the array or index expressions once.
5735 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5736 * perform local constant prop and reorder the constants in the tree and
5739 * We then parse the resulting array element expression in order to locate
5740 * and label the constants and variables that occur in the tree.
5743 const int MAX_ARR_COMPLEXITY = 4;
5744 const int MAX_INDEX_COMPLEXITY = 4;
5746 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5748 noway_assert(tree->gtOper == GT_INDEX);
5749 GenTreeIndex* asIndex = tree->AsIndex();
5751 var_types elemTyp = tree->TypeGet();
5752 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5753 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5755 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5758 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5760 // If this is a SIMD type, this is the point at which we lose the type information,
5761 // so we need to set the correct type on the GT_IND.
5762 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5763 unsigned simdElemSize = 0;
5764 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5766 assert(simdElemSize == elemSize);
5767 elemTyp = getSIMDTypeForSize(elemSize);
5768 // This is the new type of the node.
5769 tree->gtType = elemTyp;
5770 // Now set elemStructType to null so that we don't confuse value numbering.
5771 elemStructType = nullptr;
5774 #endif // FEATURE_SIMD
5776 GenTreePtr arrRef = asIndex->Arr();
5777 GenTreePtr index = asIndex->Index();
5779 // Set up the the array length's offset into lenOffs
5780 // And the the first element's offset into elemOffs
5783 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5785 lenOffs = offsetof(CORINFO_String, stringLen);
5786 elemOffs = offsetof(CORINFO_String, chars);
5787 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5789 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5791 lenOffs = offsetof(CORINFO_RefArray, length);
5792 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5794 else // We have a standard array
5796 lenOffs = offsetof(CORINFO_Array, length);
5797 elemOffs = offsetof(CORINFO_Array, u1Elems);
5800 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5801 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5803 GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5804 GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5805 GenTreePtr bndsChk = nullptr;
5807 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5810 GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5811 GenTreePtr index2 = nullptr;
5813 // If the arrRef expression involves an assignment, a call or reads from global memory,
5814 // then we *must* allocate a temporary in which to "localize" those values,
5815 // to ensure that the same values are used in the bounds check and the actual
5817 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5818 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5819 // complexity is not exposed. (Without that condition there are cases of local struct
5820 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5821 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5823 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5824 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5826 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5827 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5828 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5829 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5833 arrRef2 = gtCloneExpr(arrRef);
5834 noway_assert(arrRef2 != nullptr);
5837 // If the index expression involves an assignment, a call or reads from global memory,
5838 // we *must* allocate a temporary in which to "localize" those values,
5839 // to ensure that the same values are used in the bounds check and the actual
5841 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5843 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5844 (arrRef->OperGet() == GT_FIELD))
5846 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5847 indexDefn = gtNewTempAssign(indexTmpNum, index);
5848 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5849 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5853 index2 = gtCloneExpr(index);
5854 noway_assert(index2 != nullptr);
5857 // Next introduce a GT_ARR_BOUNDS_CHECK node
5858 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5860 #ifdef _TARGET_64BIT_
5861 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5862 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5863 // the comparison will have to be widen to 64 bits.
5864 if (index->TypeGet() == TYP_I_IMPL)
5866 bndsChkType = TYP_I_IMPL;
5868 #endif // _TARGET_64BIT_
5870 GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5872 if (bndsChkType != TYP_INT)
5874 arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5877 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5878 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5880 bndsChk = arrBndsChk;
5882 // Make sure to increment ref-counts if already ref-counted.
5883 if (lvaLocalVarRefCounted)
5885 lvaRecursiveIncRefCounts(index);
5886 lvaRecursiveIncRefCounts(arrRef);
5889 // Now we'll switch to using the second copies for arrRef and index
5890 // to compute the address expression
5896 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5900 #ifdef _TARGET_64BIT_
5901 // Widen 'index' on 64-bit targets
5902 if (index->TypeGet() != TYP_I_IMPL)
5904 if (index->OperGet() == GT_CNS_INT)
5906 index->gtType = TYP_I_IMPL;
5910 index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5913 #endif // _TARGET_64BIT_
5915 /* Scale the index value if necessary */
5918 GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5920 // Fix 392756 WP7 Crossgen
5922 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5923 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5924 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5926 size->gtFlags |= GTF_DONT_CSE;
5928 /* Multiply by the array element size */
5929 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5936 /* Add the object ref to the element's offset */
5938 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5940 /* Add the first element's offset */
5942 GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5944 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5946 #if SMALL_TREE_NODES
5947 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5950 // Change the orginal GT_INDEX node into a GT_IND node
5951 tree->SetOper(GT_IND);
5953 // If the index node is a floating-point type, notify the compiler
5954 // we'll potentially use floating point registers at the time of codegen.
5955 if (varTypeIsFloating(tree->gtType))
5957 this->compFloatingPointUsed = true;
5960 // We've now consumed the GTF_INX_RNGCHK, and the node
5961 // is no longer a GT_INDEX node.
5962 tree->gtFlags &= ~GTF_INX_RNGCHK;
5964 tree->gtOp.gtOp1 = addr;
5966 // This is an array index expression.
5967 tree->gtFlags |= GTF_IND_ARR_INDEX;
5969 /* An indirection will cause a GPF if the address is null */
5970 tree->gtFlags |= GTF_EXCEPT;
5974 tree->gtFlags |= GTF_DONT_CSE;
5977 // Store information about it.
5978 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5980 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5982 GenTreePtr indTree = tree;
5984 // Did we create a bndsChk tree?
5987 // Use a GT_COMMA node to prepend the array bound check
5989 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5991 /* Mark the indirection node as needing a range check */
5992 fgSetRngChkTarget(bndsChk);
5995 if (indexDefn != nullptr)
5997 // Use a GT_COMMA node to prepend the index assignment
5999 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
6001 if (arrRefDefn != nullptr)
6003 // Use a GT_COMMA node to prepend the arRef assignment
6005 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
6008 // Currently we morph the tree to perform some folding operations prior
6009 // to attaching fieldSeq info and labeling constant array index contributions
6013 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
6014 // constant array index contributions, but the morphing operation may have changed
6015 // the 'tree' into something that now unconditionally throws an exception.
6017 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
6018 // or it could be left unchanged. If it is unchanged then we should not return,
6019 // instead we should proceed to attaching fieldSeq info, etc...
6021 GenTreePtr arrElem = tree->gtEffectiveVal();
6023 if (fgIsCommaThrow(tree))
6025 if ((arrElem != indTree) || // A new tree node may have been created
6026 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6028 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6032 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6034 addr = arrElem->gtOp.gtOp1;
6036 assert(addr->TypeGet() == TYP_BYREF);
6038 GenTreePtr cnsOff = nullptr;
6039 if (addr->OperGet() == GT_ADD)
6041 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6043 cnsOff = addr->gtOp.gtOp2;
6044 addr = addr->gtOp.gtOp1;
6047 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6049 assert(addr->TypeGet() == TYP_BYREF);
6050 GenTreePtr index = addr->gtOp.gtOp2;
6052 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6053 index->LabelIndex(this);
6055 addr = addr->gtOp.gtOp1;
6057 assert(addr->TypeGet() == TYP_REF);
6059 else if (addr->OperGet() == GT_CNS_INT)
6064 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6066 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6068 // Assign it the [#FirstElem] field sequence
6070 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6072 else // We have folded the first element's offset with the index expression
6074 // Build the [#ConstantIndex, #FirstElem] field sequence
6076 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6077 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6079 if (cnsOff == nullptr) // It must have folded into a zero offset
6081 // Record in the general zero-offset map.
6082 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6086 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6094 /*****************************************************************************
6096 * Wrap fixed stack arguments for varargs functions to go through varargs
6097 * cookie to access them, except for the cookie itself.
6099 * Non-x86 platforms are allowed to access all arguments directly
6100 * so we don't need this code.
6103 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6105 /* For the fixed stack arguments of a varargs function, we need to go
6106 through the varargs cookies to access them, except for the
6109 LclVarDsc* varDsc = &lvaTable[lclNum];
6111 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6113 // Create a node representing the local pointing to the base of the args
6115 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6116 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
6119 // Access the argument through the local
6121 if (varType == TYP_STRUCT)
6123 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6127 tree = gtNewOperNode(GT_IND, varType, ptrArg);
6129 tree->gtFlags |= GTF_IND_TGTANYWHERE;
6131 if (varDsc->lvAddrExposed)
6133 tree->gtFlags |= GTF_GLOB_REF;
6136 return fgMorphTree(tree);
6143 /*****************************************************************************
6145 * Transform the given GT_LCL_VAR tree for code generation.
6148 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
6150 assert(tree->gtOper == GT_LCL_VAR);
6152 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6153 var_types varType = lvaGetRealType(lclNum);
6154 LclVarDsc* varDsc = &lvaTable[lclNum];
6156 if (varDsc->lvAddrExposed)
6158 tree->gtFlags |= GTF_GLOB_REF;
6162 if (info.compIsVarArgs)
6164 GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6165 if (newTree != nullptr)
6167 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6169 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6174 #endif // _TARGET_X86_
6176 /* If not during the global morphing phase bail */
6178 if (!fgGlobalMorph && !forceRemorph)
6183 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6185 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6187 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6189 #if LOCAL_ASSERTION_PROP
6190 /* Assertion prop can tell us to omit adding a cast here */
6191 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6196 /* Small-typed arguments and aliased locals are normalized on load.
6197 Other small-typed locals are normalized on store.
6198 Also, under the debugger as the debugger could write to the variable.
6199 If this is one of the former, insert a narrowing cast on the load.
6200 ie. Convert: var-short --> cast-short(var-int) */
6202 tree->gtType = TYP_INT;
6203 fgMorphTreeDone(tree);
6204 tree = gtNewCastNode(TYP_INT, tree, varType);
6205 fgMorphTreeDone(tree);
6212 /*****************************************************************************
6213 Grab a temp for big offset morphing.
6214 This method will grab a new temp if no temp of this "type" has been created.
6215 Or it will return the same cached one if it has been created.
6217 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6219 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6221 if (lclNum == BAD_VAR_NUM)
6223 // We haven't created a temp for this kind of type. Create one now.
6224 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6225 fgBigOffsetMorphingTemps[type] = lclNum;
6229 // We better get the right type.
6230 noway_assert(lvaTable[lclNum].TypeGet() == type);
6233 noway_assert(lclNum != BAD_VAR_NUM);
6237 /*****************************************************************************
6239 * Transform the given GT_FIELD tree for code generation.
6242 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6244 assert(tree->gtOper == GT_FIELD);
6246 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6247 unsigned fldOffset = tree->gtField.gtFldOffset;
6248 GenTreePtr objRef = tree->gtField.gtFldObj;
6249 bool fieldMayOverlap = false;
6250 bool objIsLocal = false;
6252 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6254 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6255 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6256 // simd field rewrites are sensitive to.
6257 fgMorphImplicitByRefArgs(objRef);
6260 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6261 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6263 if (tree->gtField.gtFldMayOverlap)
6265 fieldMayOverlap = true;
6266 // Reset the flag because we may reuse the node.
6267 tree->gtField.gtFldMayOverlap = false;
6271 // if this field belongs to simd struct, translate it to simd instrinsic.
6274 GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6275 if (newTree != tree)
6277 newTree = fgMorphSmpOp(newTree);
6281 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6283 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6286 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6291 /* Is this an instance data member? */
6296 objIsLocal = objRef->IsLocal();
6298 if (tree->gtFlags & GTF_IND_TLS_REF)
6300 NO_WAY("instance field can not be a TLS ref.");
6303 /* We'll create the expression "*(objRef + mem_offs)" */
6305 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6307 // An optimization for Contextful classes:
6308 // we unwrap the proxy when we have a 'this reference'
6309 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6311 objRef = fgUnwrapProxy(objRef);
6315 Now we have a tree like this:
6317 +--------------------+
6319 +----------+---------+
6321 +--------------+-------------+
6322 | tree->gtField.gtFldObj |
6323 +--------------+-------------+
6326 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6328 +--------------------+
6329 | GT_IND/GT_OBJ | tree
6330 +---------+----------+
6333 +---------+----------+
6335 +---------+----------+
6340 +-------------------+ +----------------------+
6341 | objRef | | fldOffset |
6342 | | | (when fldOffset !=0) |
6343 +-------------------+ +----------------------+
6346 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6349 +--------------------+
6350 | GT_IND/GT_OBJ | tree
6351 +----------+---------+
6353 +----------+---------+
6355 +----------+---------+
6361 +---------+----------+ +---------+----------+
6362 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6363 +---------+----------+ +---------+----------+
6368 +-----+-----+ +-----+-----+ +---------+ +-----------+
6369 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6370 +-----+-----+ +-----+-----+ +---------+ +-----------+
6375 +-----+-----+ +-----+-----+ +-----------+
6376 | tmpLcl | | objRef | | tmpLcl |
6377 +-----------+ +-----------+ +-----------+
6382 var_types objRefType = objRef->TypeGet();
6384 GenTreePtr comma = nullptr;
6386 bool addedExplicitNullCheck = false;
6388 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6389 // and thus is equivalent to a MACK_Ind with zero offset.
6390 MorphAddrContext defMAC(MACK_Ind);
6396 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6397 // This means that we insert an explicit null check whenever we create byref by adding a
6398 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6399 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6400 // small offsets); in this plan, we would transfer some null-checking responsibility to
6401 // callee's of methods taking byref parameters. They would have to add explicit null checks
6402 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6403 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6404 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6405 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6406 // This is left here to point out how to implement it.
6407 CLANG_FORMAT_COMMENT_ANCHOR;
6409 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6411 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6412 // whose address is being taken is either a local or static variable, whose address is necessarily
6413 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6414 if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6415 (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6416 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6417 || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6419 || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6420 (mac->m_totalOffset + fldOffset > 0))
6427 printf("Before explicit null check morphing:\n");
6433 // Create the "comma" subtree
6435 GenTreePtr asg = nullptr;
6440 if (objRef->gtOper != GT_LCL_VAR)
6442 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6444 // Create the "asg" node
6445 asg = gtNewTempAssign(lclNum, objRef);
6449 lclNum = objRef->gtLclVarCommon.gtLclNum;
6452 // Create the "nullchk" node.
6453 // Make it TYP_BYTE so we only deference it for 1 byte.
6454 GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6455 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6457 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6459 // An indirection will cause a GPF if the address is null.
6460 nullchk->gtFlags |= GTF_EXCEPT;
6462 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6463 optMethodFlags |= OMF_HAS_NULLCHECK;
6467 // Create the "comma" node.
6468 comma = gtNewOperNode(GT_COMMA,
6469 TYP_VOID, // We don't want to return anything from this "comma" node.
6470 // Set the type to TYP_VOID, so we can select "cmp" instruction
6471 // instead of "mov" instruction later on.
6479 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6481 addedExplicitNullCheck = true;
6483 else if (fldOffset == 0)
6485 // Generate the "addr" node.
6487 FieldSeqNode* fieldSeq =
6488 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6489 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6496 #ifdef FEATURE_READYTORUN_COMPILER
6497 if (tree->gtField.gtFieldLookup.addr != nullptr)
6499 GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6501 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6503 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6507 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6512 // Generate the "addr" node.
6513 /* Add the member offset to the object's address */
6514 FieldSeqNode* fieldSeq =
6515 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6516 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6517 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6520 // Now let's set the "tree" as a GT_IND tree.
6522 tree->SetOper(GT_IND);
6523 tree->gtOp.gtOp1 = addr;
6525 if (fgAddrCouldBeNull(addr))
6527 // This indirection can cause a GPF if the address could be null.
6528 tree->gtFlags |= GTF_EXCEPT;
6531 if (addedExplicitNullCheck)
6534 // Create "comma2" node and link it to "tree".
6537 comma2 = gtNewOperNode(GT_COMMA,
6538 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6540 tree->gtOp.gtOp1 = comma2;
6546 if (addedExplicitNullCheck)
6548 printf("After adding explicit null check:\n");
6554 else /* This is a static data member */
6556 if (tree->gtFlags & GTF_IND_TLS_REF)
6558 // Thread Local Storage static field reference
6560 // Field ref is a TLS 'Thread-Local-Storage' reference
6562 // Build this tree: IND(*) #
6570 // IND(I_IMPL) == [Base of this DLL's TLS]
6574 // / CNS(IdValue*4) or MUL
6576 // IND(I_IMPL) / CNS(4)
6578 // CNS(TLS_HDL,0x2C) IND
6582 // # Denotes the orginal node
6584 void** pIdAddr = nullptr;
6585 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6588 // If we can we access the TLS DLL index ID value directly
6589 // then pIdAddr will be NULL and
6590 // IdValue will be the actual TLS DLL index ID
6592 GenTreePtr dllRef = nullptr;
6593 if (pIdAddr == nullptr)
6597 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6602 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6603 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6604 dllRef->gtFlags |= GTF_IND_INVARIANT;
6608 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6611 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6613 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6615 GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6617 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6618 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6620 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6621 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6624 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6626 if (dllRef != nullptr)
6628 /* Add the dllRef */
6629 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6632 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6633 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6637 FieldSeqNode* fieldSeq =
6638 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6639 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6641 /* Add the TLS static field offset to the address */
6643 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6646 // Final indirect to get to actual value of TLS static field
6648 tree->SetOper(GT_IND);
6649 tree->gtOp.gtOp1 = tlsRef;
6651 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6655 // Normal static field reference
6658 // If we can we access the static's address directly
6659 // then pFldAddr will be NULL and
6660 // fldAddr will be the actual address of the static field
6662 void** pFldAddr = nullptr;
6663 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6665 if (pFldAddr == nullptr)
6667 #ifdef _TARGET_64BIT_
6668 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6670 // The address is not directly addressible, so force it into a
6671 // constant, so we handle it properly
6673 GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6674 addr->gtType = TYP_I_IMPL;
6675 FieldSeqNode* fieldSeq =
6676 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6677 addr->gtIntCon.gtFieldSeq = fieldSeq;
6678 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6679 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6681 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6682 addr->gtFlags |= GTF_ICON_INITCLASS;
6685 tree->SetOper(GT_IND);
6686 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6687 // We must clear it when we transform the node.
6688 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6689 // that the logic above does its own checking to determine whether a nullcheck is needed.
6690 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6691 tree->gtOp.gtOp1 = addr;
6693 return fgMorphSmpOp(tree);
6696 #endif // _TARGET_64BIT_
6698 // Only volatile or classinit could be set, and they map over
6699 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6700 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6701 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6702 tree->SetOper(GT_CLS_VAR);
6703 tree->gtClsVar.gtClsVarHnd = symHnd;
6704 FieldSeqNode* fieldSeq =
6705 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6706 tree->gtClsVar.gtFieldSeq = fieldSeq;
6713 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6715 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6716 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6718 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6719 addr->gtFlags |= GTF_ICON_INITCLASS;
6722 // There are two cases here, either the static is RVA based,
6723 // in which case the type of the FIELD node is not a GC type
6724 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6725 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6726 // because handles to statics now go into the large object heap
6728 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6729 GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6730 op1->gtFlags |= GTF_IND_INVARIANT;
6732 tree->SetOper(GT_IND);
6733 tree->gtOp.gtOp1 = op1;
6737 noway_assert(tree->gtOper == GT_IND);
6738 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6739 // We must clear it when we transform the node.
6740 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6741 // that the logic above does its own checking to determine whether a nullcheck is needed.
6742 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6744 GenTreePtr res = fgMorphSmpOp(tree);
6746 // If we have a struct type, this node would previously have been under a GT_ADDR,
6747 // and therefore would have been marked GTF_DONT_CSE.
6748 // TODO-1stClassStructs: revisit this.
6749 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6751 res->gtFlags |= GTF_DONT_CSE;
6754 if (fldOffset == 0 && res->OperGet() == GT_IND)
6756 GenTreePtr addr = res->gtOp.gtOp1;
6757 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6758 FieldSeqNode* fieldSeq =
6759 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6760 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6766 //------------------------------------------------------------------------------
6767 // fgMorphCallInline: attempt to inline a call
6770 // call - call expression to inline, inline candidate
6771 // inlineResult - result tracking and reporting
6774 // Attempts to inline the call.
6776 // If successful, callee's IR is inserted in place of the call, and
6777 // is marked with an InlineContext.
6779 // If unsuccessful, the transformations done in anticpation of a
6780 // possible inline are undone, and the candidate flag on the call
6783 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6785 // The call must be a candiate for inlining.
6786 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6788 // Attempt the inline
6789 fgMorphCallInlineHelper(call, inlineResult);
6791 // We should have made up our minds one way or another....
6792 assert(inlineResult->IsDecided());
6794 // If we failed to inline, we have a bit of work to do to cleanup
6795 if (inlineResult->IsFailure())
6800 // Before we do any cleanup, create a failing InlineContext to
6801 // capture details of the inlining attempt.
6802 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6806 // It was an inline candidate, but we haven't expanded it.
6807 if (call->gtCall.gtReturnType != TYP_VOID)
6809 // Detach the GT_CALL tree from the original statement by
6810 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6811 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6813 noway_assert(fgMorphStmt->gtStmtExpr == call);
6814 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6817 // Clear the Inline Candidate flag so we can ensure later we tried
6818 // inlining all candidates.
6820 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6824 /*****************************************************************************
6825 * Helper to attempt to inline a call
6826 * Sets success/failure in inline result
6827 * If success, modifies current method's IR with inlinee's IR
6828 * If failed, undoes any speculative modifications to current method
6831 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6833 // Don't expect any surprises here.
6834 assert(result->IsCandidate());
6836 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6838 // For now, attributing this to call site, though it's really
6839 // more of a budget issue (lvaCount currently includes all
6840 // caller and prospective callee locals). We still might be
6841 // able to inline other callees into this caller, or inline
6842 // this callee in other callers.
6843 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6847 if (call->IsVirtual())
6849 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6853 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6854 // and recursive tail calls as inline candidates.
6855 noway_assert(!call->IsTailPrefixedCall());
6856 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6858 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6859 Although we have checked this in impCanInline, it is possible that later IL instructions
6860 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6863 if (opts.compNeedSecurityCheck)
6865 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6870 // Calling inlinee's compiler to inline the method.
6873 unsigned startVars = lvaCount;
6878 printf("Expanding INLINE_CANDIDATE in statement ");
6879 printTreeID(fgMorphStmt);
6880 printf(" in BB%02u:\n", compCurBB->bbNum);
6881 gtDispTree(fgMorphStmt);
6882 if (call->IsImplicitTailCall())
6884 printf("Note: candidate is implicit tail call\n");
6889 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6892 // Invoke the compiler to inline the call.
6895 fgInvokeInlineeCompiler(call, result);
6897 if (result->IsFailure())
6899 // Undo some changes made in anticipation of inlining...
6901 // Zero out the used locals
6902 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6903 for (unsigned i = startVars; i < lvaCount; i++)
6905 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6908 lvaCount = startVars;
6913 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6923 // printf("After inlining lvaCount=%d.\n", lvaCount);
6928 /*****************************************************************************
6930 * Performs checks to see if this tail call can be optimized as epilog+jmp.
6932 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6934 #if FEATURE_FASTTAILCALL
6935 // Reached here means that return types of caller and callee are tail call compatible.
6936 // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6938 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6939 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6940 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6941 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6942 // currently not copying/setting callSig.
6943 CLANG_FORMAT_COMMENT_ANCHOR;
6946 if (callee->IsTailPrefixedCall())
6948 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6949 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6953 // Note on vararg methods:
6954 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6955 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6956 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6957 // out-going area required for callee is bounded by caller's fixed argument space.
6959 // Note that callee being a vararg method is not a problem since we can account the params being passed.
6961 // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6962 unsigned nCallerArgs = info.compArgsCount;
6964 // Count the callee args including implicit and hidden.
6965 // Note that GenericContext and VarargCookie are added by importer while
6966 // importing the call to gtCallArgs list along with explicit user args.
6967 unsigned nCalleeArgs = 0;
6968 if (callee->gtCallObjp) // thisPtr
6973 if (callee->HasRetBufArg()) // RetBuf
6977 // If callee has RetBuf param, caller too must have it.
6978 // Otherwise go the slow route.
6979 if (info.compRetBuffArg == BAD_VAR_NUM)
6985 // Count user args while tracking whether any of them is a multi-byte params
6986 // that cannot be passed in a register. Note that we don't need to count
6987 // non-standard and secret params passed in registers (e.g. R10, R11) since
6988 // these won't contribute to out-going arg size.
6989 bool hasMultiByteArgs = false;
6990 for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6994 assert(args->OperIsList());
6995 GenTreePtr argx = args->gtOp.gtOp1;
6997 if (varTypeIsStruct(argx))
6999 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7000 while (argx->gtOper == GT_COMMA)
7002 argx = argx->gtOp.gtOp2;
7005 // Get the size of the struct and see if it is register passable.
7006 CORINFO_CLASS_HANDLE objClass = nullptr;
7008 if (argx->OperGet() == GT_OBJ)
7010 objClass = argx->AsObj()->gtClass;
7012 else if (argx->IsLocal())
7014 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7016 if (objClass != nullptr)
7018 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7020 unsigned typeSize = 0;
7021 hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
7023 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
7024 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
7025 // Account for the second eightbyte in the nCalleeArgs.
7026 // https://github.com/dotnet/coreclr/issues/2666
7027 // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
7028 // as two args, since they need two registers whereas nCallerArgs is
7029 // counting such an arg as one. This would mean we will not be optimizing
7030 // certain calls though technically possible.
7032 if (typeSize > TARGET_POINTER_SIZE)
7034 unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
7035 nCalleeArgs += extraArgRegsToAdd;
7037 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
7040 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7042 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7046 hasMultiByteArgs = true;
7051 // Go the slow route, if it has multi-byte params
7052 if (hasMultiByteArgs)
7057 // If we reached here means that callee has only those argument types which can be passed in
7058 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7059 // If we are passing args on stack for callee and it has more args passed on stack than
7060 // caller, then fast tail call cannot be performed.
7062 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7063 // as non-interruptible for fast tail calls.
7064 if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
7075 /*****************************************************************************
7077 * Transform the given GT_CALL tree for tail call code generation.
7079 void Compiler::fgMorphTailCall(GenTreeCall* call)
7081 JITDUMP("fgMorphTailCall (before):\n");
7084 #if defined(_TARGET_ARM_)
7085 // For the helper-assisted tail calls, we need to push all the arguments
7086 // into a single list, and then add a few extra at the beginning
7088 // Check for PInvoke call types that we don't handle in codegen yet.
7089 assert(!call->IsUnmanaged());
7090 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7092 // First move the this pointer (if any) onto the regular arg list
7093 GenTreePtr thisPtr = NULL;
7094 if (call->gtCallObjp)
7096 GenTreePtr objp = call->gtCallObjp;
7097 call->gtCallObjp = NULL;
7099 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7101 thisPtr = gtClone(objp, true);
7102 var_types vt = objp->TypeGet();
7103 if (thisPtr == NULL)
7105 // Too complex, so use a temp
7106 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7107 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7108 if (!call->IsVirtualVtable())
7110 // Add an indirection to get the nullcheck
7111 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7112 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7113 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7115 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7116 thisPtr = gtNewLclvNode(lclNum, vt);
7118 else if (!call->IsVirtualVtable())
7120 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7121 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7122 thisPtr = gtClone(thisPtr, true);
7125 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7128 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7131 // Add the extra VSD parameter if needed
7132 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7133 if (call->IsVirtualStub())
7135 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7138 if (call->gtCallType == CT_INDIRECT)
7140 arg = gtClone(call->gtCallAddr, true);
7141 noway_assert(arg != NULL);
7145 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7146 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7147 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7149 // Change the call type, so we can add the extra indirection here, rather than in codegen
7150 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7151 call->gtStubCallStubAddr = NULL;
7152 call->gtCallType = CT_INDIRECT;
7154 // Add the extra indirection to generate the real target
7155 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7156 call->gtFlags |= GTF_EXCEPT;
7158 // And push the stub address onto the list of arguments
7159 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7161 else if (call->IsVirtualVtable())
7163 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7165 noway_assert(thisPtr != NULL);
7167 GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7168 GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7169 vtbl->gtFlags |= GTF_EXCEPT;
7171 unsigned vtabOffsOfIndirection;
7172 unsigned vtabOffsAfterIndirection;
7173 unsigned isRelative;
7174 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7177 /* Get the appropriate vtable chunk */
7179 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7181 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7183 GenTreePtr indOffTree;
7187 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7188 nullptr DEBUGARG("virtual table call"));
7191 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7195 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7199 /* Now the appropriate vtable slot */
7201 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7202 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7204 // Switch this to a plain indirect call
7205 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7206 assert(!call->IsVirtual());
7207 call->gtCallType = CT_INDIRECT;
7209 call->gtCallAddr = vtbl;
7210 call->gtCallCookie = NULL;
7211 call->gtFlags |= GTF_EXCEPT;
7214 // Now inject a placeholder for the real call target that codegen
7216 GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7217 #ifdef LEGACY_BACKEND
7218 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7220 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7222 // Lastly inject the pointer for the copy routine
7223 noway_assert(call->callSig != NULL);
7224 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7225 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7226 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7228 // It is now a varargs tail call
7229 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7230 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7232 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7234 // x86 classic codegen doesn't require any morphing
7236 // For the helper-assisted tail calls, we need to push all the arguments
7237 // into a single list, and then add a few extra at the beginning or end.
7239 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7241 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7243 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7244 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7245 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7247 // For x86, the tailcall helper is defined as:
7249 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7252 // Note that the special arguments are on the stack, whereas the function arguments follow
7253 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7254 // look like (highest address at the top):
7255 // first normal stack argument
7257 // last normal stack argument
7258 // numberOfOldStackArgs
7259 // numberOfNewStackArgs
7263 // Each special arg is 4 bytes.
7265 // 'flags' is a bitmask where:
7266 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7267 // callee-saved registers for tailcall functions. Note that the helper assumes
7268 // that the callee-saved registers live immediately below EBP, and must have been
7269 // pushed in this order: EDI, ESI, EBX.
7270 // 2 == call target is a virtual stub dispatch.
7272 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7273 // on the custom calling convention.
7275 // Check for PInvoke call types that we don't handle in codegen yet.
7276 assert(!call->IsUnmanaged());
7277 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7279 // Don't support tail calling helper methods
7280 assert(call->gtCallType != CT_HELPER);
7282 // We come this route only for tail prefixed calls that cannot be dispatched as
7284 assert(!call->IsImplicitTailCall());
7285 assert(!fgCanFastTailCall(call));
7287 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7288 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7289 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7290 // addition, for all platforms, we are going to change the call into a helper call. Our code
7291 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7292 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7293 // since special 'this' pointer handling will no longer kick in.
7295 // Some call types, such as virtual vtable calls, require creating a call address expression
7296 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7297 // to create a temporary that is assigned to the "this" pointer expression, and then use
7298 // that temp to create the call address expression. This temp creation embedded statement
7299 // will occur immediately before the "this" pointer argument, and then will be used for both
7300 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7301 // the embedded statement establishing the "this" pointer temp will execute before both uses
7302 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7303 // normal call argument list, and insert a placeholder which will hold the call address
7304 // expression. For non-x86, things are ok, because the order of execution of these is not
7305 // altered. However, for x86, the call address expression is inserted as the *last* argument
7306 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7307 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7308 // for those cases where call lowering creates an embedded form temp of "this", we will
7309 // create a temp here, early, that will later get morphed correctly.
7311 if (call->gtCallObjp)
7313 GenTreePtr thisPtr = nullptr;
7314 GenTreePtr objp = call->gtCallObjp;
7315 call->gtCallObjp = nullptr;
7318 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7321 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7322 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7324 // COMMA(tmp = "this", tmp)
7325 var_types vt = objp->TypeGet();
7326 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7327 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7331 #endif // _TARGET_X86_
7333 #if defined(_TARGET_X86_)
7334 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7335 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7336 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7337 if (call->NeedsNullCheck() || call->IsVirtualStub())
7339 if (call->NeedsNullCheck())
7340 #endif // defined(_TARGET_X86_)
7342 // clone "this" if "this" has no side effects.
7343 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7345 thisPtr = gtClone(objp, true);
7348 var_types vt = objp->TypeGet();
7349 if (thisPtr == nullptr)
7351 // create a temp if either "this" has side effects or "this" is too complex to clone.
7354 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7355 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7357 // COMMA(tmp = "this", deref(tmp))
7358 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7359 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7360 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7362 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7363 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7367 // thisPtr = COMMA(deref("this"), "this")
7368 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7369 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7372 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7379 // During rationalization tmp="this" and null check will
7380 // materialize as embedded stmts in right execution order.
7381 assert(thisPtr != nullptr);
7382 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7385 #if defined(_TARGET_AMD64_)
7387 // Add the extra VSD parameter to arg list in case of VSD calls.
7388 // Tail call arg copying thunk will move this extra VSD parameter
7389 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7390 // in Stublinkerx86.cpp for more details.
7391 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7392 if (call->IsVirtualStub())
7394 GenTreePtr stubAddrArg;
7396 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7398 if (call->gtCallType == CT_INDIRECT)
7400 stubAddrArg = gtClone(call->gtCallAddr, true);
7401 noway_assert(stubAddrArg != nullptr);
7405 noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7407 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7408 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7411 // Push the stub address onto the list of arguments
7412 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7415 // Now inject a placeholder for the real call target that Lower phase will generate.
7416 GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
7417 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7419 // Inject the pointer for the copy routine to be used for struct copying
7420 noway_assert(call->callSig != nullptr);
7421 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7422 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7423 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7425 #else // !_TARGET_AMD64_
7427 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7428 // append to the list.
7429 GenTreeArgList** ppArg = &call->gtCallArgs;
7430 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7432 ppArg = (GenTreeArgList**)&args->gtOp2;
7434 assert(ppArg != nullptr);
7435 assert(*ppArg == nullptr);
7437 unsigned nOldStkArgsWords =
7438 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7439 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7440 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7441 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7443 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7444 // The constant will be replaced.
7445 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7446 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7447 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7449 // Inject a placeholder for the flags.
7450 // The constant will be replaced.
7451 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7452 *ppArg = gtNewListNode(arg1, nullptr);
7453 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7455 // Inject a placeholder for the real call target that the Lowering phase will generate.
7456 // The constant will be replaced.
7457 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7458 *ppArg = gtNewListNode(arg0, nullptr);
7460 #endif // !_TARGET_AMD64_
7462 // It is now a varargs tail call dispatched via helper.
7463 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7464 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7468 JITDUMP("fgMorphTailCall (after):\n");
7472 //------------------------------------------------------------------------------
7473 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7477 // block - basic block ending with a recursive fast tail call
7478 // recursiveTailCall - recursive tail call to transform
7481 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7483 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7485 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7486 GenTreePtr last = block->lastStmt();
7487 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7489 // Transform recursive tail call into a loop.
7491 GenTreePtr earlyArgInsertionPoint = last;
7492 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
7494 // Hoist arg setup statement for the 'this' argument.
7495 GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7496 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7498 GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7499 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7502 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7503 // then the temps need to be assigned to the method parameters. This is done so that the caller
7504 // parameters are not re-assigned before call arguments depending on them are evaluated.
7505 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7506 // where the next temp or parameter assignment should be inserted.
7508 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7509 // while the second call argument (const 1) doesn't.
7510 // Basic block before tail recursion elimination:
7511 // ***** BB04, stmt 1 (top level)
7512 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7513 // [000033] --C - G------ - \--* call void RecursiveMethod
7514 // [000030] ------------ | / --* const int - 1
7515 // [000031] ------------arg0 in rcx + --* +int
7516 // [000029] ------------ | \--* lclVar int V00 arg1
7517 // [000032] ------------arg1 in rdx \--* const int 1
7520 // Basic block after tail recursion elimination :
7521 // ***** BB04, stmt 1 (top level)
7522 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7523 // [000030] ------------ | / --* const int - 1
7524 // [000031] ------------ | / --* +int
7525 // [000029] ------------ | | \--* lclVar int V00 arg1
7526 // [000050] - A---------- \--* = int
7527 // [000049] D------N---- \--* lclVar int V02 tmp0
7529 // ***** BB04, stmt 2 (top level)
7530 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7531 // [000052] ------------ | / --* lclVar int V02 tmp0
7532 // [000054] - A---------- \--* = int
7533 // [000053] D------N---- \--* lclVar int V00 arg0
7535 // ***** BB04, stmt 3 (top level)
7536 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7537 // [000032] ------------ | / --* const int 1
7538 // [000057] - A---------- \--* = int
7539 // [000056] D------N---- \--* lclVar int V01 arg1
7541 GenTreePtr tmpAssignmentInsertionPoint = last;
7542 GenTreePtr paramAssignmentInsertionPoint = last;
7544 // Process early args. They may contain both setup statements for late args and actual args.
7545 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7546 // below has the correct second argument.
7547 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7548 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7549 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7551 GenTreePtr earlyArg = earlyArgs->Current();
7552 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7554 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7556 // This is a setup node so we need to hoist it.
7557 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7558 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7562 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7563 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7564 GenTreePtr paramAssignStmt =
7565 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7566 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7567 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7569 // All temp assignments will happen before the first param assignment.
7570 tmpAssignmentInsertionPoint = paramAssignStmt;
7576 // Process late args.
7577 int lateArgIndex = 0;
7578 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7579 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7581 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7582 GenTreePtr lateArg = lateArgs->Current();
7583 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7584 GenTreePtr paramAssignStmt =
7585 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7586 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7588 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7590 // All temp assignments will happen before the first param assignment.
7591 tmpAssignmentInsertionPoint = paramAssignStmt;
7595 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7596 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7597 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7598 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7600 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7601 GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7602 GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7603 GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7604 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7608 fgRemoveStmt(block, last);
7610 // Set the loop edge.
7611 block->bbJumpKind = BBJ_ALWAYS;
7612 block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7613 fgAddRefPred(block->bbJumpDest, block);
7614 block->bbFlags &= ~BBF_HAS_JMP;
7617 //------------------------------------------------------------------------------
7618 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7622 // arg - argument to assign
7623 // argTabEntry - argument table entry corresponding to arg
7624 // block --- basic block the call is in
7625 // callILOffset - IL offset of the call
7626 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7627 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7630 // parameter assignment statement if one was inserted; nullptr otherwise.
7632 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
7633 fgArgTabEntryPtr argTabEntry,
7635 IL_OFFSETX callILOffset,
7636 GenTreePtr tmpAssignmentInsertionPoint,
7637 GenTreePtr paramAssignmentInsertionPoint)
7639 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7640 // some argument trees may reference parameters directly.
7642 GenTreePtr argInTemp = nullptr;
7643 unsigned originalArgNum = argTabEntry->argNum;
7644 bool needToAssignParameter = true;
7646 // TODO-CQ: enable calls with struct arguments passed in registers.
7647 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7649 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7651 // The argument is already assigned to a temp or is a const.
7654 else if (arg->OperGet() == GT_LCL_VAR)
7656 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7657 LclVarDsc* varDsc = &lvaTable[lclNum];
7658 if (!varDsc->lvIsParam)
7660 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7663 else if (lclNum == originalArgNum)
7665 // The argument is the same parameter local that we were about to assign so
7666 // we can skip the assignment.
7667 needToAssignParameter = false;
7671 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7672 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7673 // more temp assignments.
7675 GenTreePtr paramAssignStmt = nullptr;
7676 if (needToAssignParameter)
7678 if (argInTemp == nullptr)
7680 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7681 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7682 // doesn't involve any caller parameters.
7683 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7684 GenTreePtr tempSrc = arg;
7685 GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7686 GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7687 GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7688 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7689 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7692 // Now assign the temp to the parameter.
7693 LclVarDsc* paramDsc = lvaTable + originalArgNum;
7694 assert(paramDsc->lvIsParam);
7695 GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7696 GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7697 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7699 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7701 return paramAssignStmt;
7704 /*****************************************************************************
7706 * Transform the given GT_CALL tree for code generation.
7709 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7711 if (call->CanTailCall())
7713 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7714 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7716 // It cannot be an inline candidate
7717 assert(!call->IsInlineCandidate());
7719 const char* szFailReason = nullptr;
7720 bool hasStructParam = false;
7721 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7723 szFailReason = "Might turn into an intrinsic";
7726 if (opts.compNeedSecurityCheck)
7728 szFailReason = "Needs security check";
7730 else if (compLocallocUsed)
7732 szFailReason = "Localloc used";
7734 #ifdef _TARGET_AMD64_
7735 // Needed for Jit64 compat.
7736 // In future, enabling tail calls from methods that need GS cookie check
7737 // would require codegen side work to emit GS cookie check before a tail
7739 else if (getNeedsGSSecurityCookie())
7741 szFailReason = "GS Security cookie check";
7745 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7746 else if (opts.compGcChecks)
7748 szFailReason = "GcChecks";
7751 #if FEATURE_TAILCALL_OPT
7754 // We are still not sure whether it can be a tail call. Because, when converting
7755 // a call to an implicit tail call, we must check that there are no locals with
7756 // their address taken. If this is the case, we have to assume that the address
7757 // has been leaked and the current stack frame must live until after the final
7760 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7761 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
7762 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7763 // during morph stage. The reason for also checking lvAddrExposed is that in case
7764 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7765 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7766 // never to be incorrect.
7768 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7769 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7770 // is set. This avoids the need for iterating through all lcl vars of the current
7771 // method. Right now throughout the code base we are not consistently using 'set'
7772 // method to set lvHasLdAddrOp and lvAddrExposed flags.
7775 bool hasAddrExposedVars = false;
7776 bool hasStructPromotedParam = false;
7777 bool hasPinnedVars = false;
7779 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7781 // If the method is marked as an explicit tail call we will skip the
7782 // following three hazard checks.
7783 // We still must check for any struct parameters and set 'hasStructParam'
7784 // so that we won't transform the recursive tail call into a loop.
7786 if (call->IsImplicitTailCall())
7788 if (varDsc->lvHasLdAddrOp)
7790 hasAddrExposedVars = true;
7793 if (varDsc->lvAddrExposed)
7795 if (lvaIsImplicitByRefLocal(varNum))
7797 // The address of the implicit-byref is a non-address use of the pointer parameter.
7799 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
7801 // The address of the implicit-byref's field is likewise a non-address use of the pointer
7804 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
7806 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
7807 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
7808 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
7809 assert(fgGlobalMorph);
7813 hasAddrExposedVars = true;
7817 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
7819 hasStructPromotedParam = true;
7822 if (varDsc->lvPinned)
7824 // A tail call removes the method from the stack, which means the pinning
7825 // goes away for the callee. We can't allow that.
7826 hasPinnedVars = true;
7830 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7832 hasStructParam = true;
7833 // This prevents transforming a recursive tail call into a loop
7834 // but doesn't prevent tail call optimization so we need to
7835 // look at the rest of parameters.
7840 if (hasAddrExposedVars)
7842 szFailReason = "Local address taken";
7844 if (hasStructPromotedParam)
7846 szFailReason = "Has Struct Promoted Param";
7850 szFailReason = "Has Pinned Vars";
7853 #endif // FEATURE_TAILCALL_OPT
7855 if (varTypeIsStruct(call))
7857 fgFixupStructReturn(call);
7860 var_types callType = call->TypeGet();
7862 // We have to ensure to pass the incoming retValBuf as the
7863 // outgoing one. Using a temp will not do as this function will
7864 // not regain control to do the copy.
7866 if (info.compRetBuffArg != BAD_VAR_NUM)
7868 noway_assert(callType == TYP_VOID);
7869 GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7870 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7872 szFailReason = "Need to copy return buffer";
7876 // If this is an opportunistic tail call and cannot be dispatched as
7877 // fast tail call, go the non-tail call route. This is done for perf
7880 // Avoid the cost of determining whether can be dispatched as fast tail
7881 // call if we already know that tail call cannot be honored for other
7883 bool canFastTailCall = false;
7884 if (szFailReason == nullptr)
7886 canFastTailCall = fgCanFastTailCall(call);
7887 if (!canFastTailCall)
7889 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7890 // mechanism and never via tail call helper for perf.
7891 if (call->IsImplicitTailCall())
7893 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7895 #ifndef LEGACY_BACKEND
7896 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7898 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7899 // dispatched as a fast tail call.
7901 // Methods with non-standard args will have indirection cell or cookie param passed
7902 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7903 // tail calling the target method and hence ".tail" prefix on such calls needs to be
7906 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7907 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7908 // This is done by by adding stubAddr as an additional arg before the original list of
7909 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7910 // in Stublinkerx86.cpp.
7911 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7912 "called via helper";
7914 #ifdef _TARGET_ARM64_
7917 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7918 // So, bail out if we can't make fast tail call.
7919 szFailReason = "Non-qualified fast tail call";
7922 #endif // LEGACY_BACKEND
7926 // Clear these flags before calling fgMorphCall() to avoid recursion.
7927 bool isTailPrefixed = call->IsTailPrefixedCall();
7928 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7930 #if FEATURE_TAILCALL_OPT
7931 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7935 if (!canFastTailCall && szFailReason == nullptr)
7937 szFailReason = "Non fast tail calls disabled for PAL based systems.";
7939 #endif // FEATURE_PAL
7941 if (szFailReason != nullptr)
7946 printf("\nRejecting tail call late for call ");
7948 printf(": %s\n", szFailReason);
7952 // for non user funcs, we have no handles to report
7953 info.compCompHnd->reportTailCallDecision(nullptr,
7954 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7955 isTailPrefixed, TAILCALL_FAIL, szFailReason);
7960 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7961 // We enable shared-ret tail call optimization for recursive calls even if
7962 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7963 if (gtIsRecursiveCall(call))
7966 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7967 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7968 if (compCurBB->bbJumpKind != BBJ_RETURN)
7970 compCurBB->bbJumpKind = BBJ_RETURN;
7974 // Set this flag before calling fgMorphCall() to prevent inlining this call.
7975 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7977 bool fastTailCallToLoop = false;
7978 #if FEATURE_TAILCALL_OPT
7979 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7980 // or return type is a struct that can be passed in a register.
7982 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7983 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7984 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
7985 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7986 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
7987 // generic type parameters of both caller and callee generic method are the same.
7988 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7989 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7991 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7992 fastTailCallToLoop = true;
7996 // Do some target-specific transformations (before we process the args, etc.)
7997 // This is needed only for tail prefixed calls that cannot be dispatched as
7999 if (!canFastTailCall)
8001 fgMorphTailCall(call);
8004 // Implementation note : If we optimize tailcall to do a direct jump
8005 // to the target function (after stomping on the return address, etc),
8006 // without using CORINFO_HELP_TAILCALL, we have to make certain that
8007 // we don't starve the hijacking logic (by stomping on the hijacked
8008 // return address etc).
8010 // At this point, we are committed to do the tailcall.
8011 compTailCallUsed = true;
8013 CorInfoTailCall tailCallResult;
8015 if (fastTailCallToLoop)
8017 tailCallResult = TAILCALL_RECURSIVE;
8019 else if (canFastTailCall)
8021 tailCallResult = TAILCALL_OPTIMIZED;
8025 tailCallResult = TAILCALL_HELPER;
8028 // for non user funcs, we have no handles to report
8029 info.compCompHnd->reportTailCallDecision(nullptr,
8030 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8031 isTailPrefixed, tailCallResult, nullptr);
8033 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8034 // to avoid doing any extra work for the return value.
8035 call->gtType = TYP_VOID;
8040 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8043 if (fastTailCallToLoop)
8045 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8052 GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
8055 // Tail call needs to be in one of the following IR forms
8056 // Either a call stmt or
8057 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8058 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8059 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8061 // GT_CASTS may be nested.
8062 genTreeOps stmtOper = stmtExpr->gtOper;
8063 if (stmtOper == GT_CALL)
8065 noway_assert(stmtExpr == call);
8069 noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8070 GenTreePtr treeWithCall;
8071 if (stmtOper == GT_RETURN)
8073 treeWithCall = stmtExpr->gtGetOp1();
8075 else if (stmtOper == GT_COMMA)
8077 // Second operation must be nop.
8078 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
8079 treeWithCall = stmtExpr->gtGetOp1();
8083 treeWithCall = stmtExpr->gtGetOp2();
8087 while (treeWithCall->gtOper == GT_CAST)
8089 noway_assert(!treeWithCall->gtOverflow());
8090 treeWithCall = treeWithCall->gtGetOp1();
8093 noway_assert(treeWithCall == call);
8097 // For void calls, we would have created a GT_CALL in the stmt list.
8098 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
8099 // For calls returning structs, we would have a void call, followed by a void return.
8100 // For debuggable code, it would be an assignment of the call to a temp
8101 // We want to get rid of any of this extra trees, and just leave
8103 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8105 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
8106 // Legacy Jit64 Compat:
8107 // There could be any number of GT_NOPs between tail call and GT_RETURN.
8108 // That is tail call pattern could be one of the following:
8109 // 1) tail.call, nop*, ret
8110 // 2) tail.call, nop*, pop, nop*, ret
8111 // 3) var=tail.call, nop*, ret(var)
8112 // 4) var=tail.call, nop*, pop, ret
8113 // 5) comma(tail.call, nop), nop*, ret
8115 // See impIsTailCallILPattern() for details on tail call IL patterns
8116 // that are supported.
8117 if (stmtExpr->gtOper != GT_RETURN)
8119 // First delete all GT_NOPs after the call
8120 GenTreeStmt* morphStmtToRemove = nullptr;
8121 while (nextMorphStmt != nullptr)
8123 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8124 if (!nextStmtExpr->IsNothingNode())
8129 morphStmtToRemove = nextMorphStmt;
8130 nextMorphStmt = morphStmtToRemove->gtNextStmt;
8131 fgRemoveStmt(compCurBB, morphStmtToRemove);
8134 // Check to see if there is a pop.
8135 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
8136 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
8138 // Note that pop opcode may or may not result in a new stmt (for details see
8139 // impImportBlockCode()). Hence, it is not possible to assert about the IR
8140 // form generated by pop but pop tree must be side-effect free so that we can
8141 // delete it safely.
8142 GenTreeStmt* popStmt = nextMorphStmt;
8143 nextMorphStmt = nextMorphStmt->gtNextStmt;
8145 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
8146 // the constituent nodes.
8147 GenTreePtr popExpr = popStmt->gtStmtExpr;
8148 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
8149 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
8151 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
8152 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
8154 noway_assert(isSideEffectFree);
8155 fgRemoveStmt(compCurBB, popStmt);
8158 // Next delete any GT_NOP nodes after pop
8159 while (nextMorphStmt != nullptr)
8161 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8162 if (!nextStmtExpr->IsNothingNode())
8167 morphStmtToRemove = nextMorphStmt;
8168 nextMorphStmt = morphStmtToRemove->gtNextStmt;
8169 fgRemoveStmt(compCurBB, morphStmtToRemove);
8172 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
8174 // Delete GT_RETURN if any
8175 if (nextMorphStmt != nullptr)
8177 GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
8178 noway_assert(retExpr->gtOper == GT_RETURN);
8180 // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
8181 // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
8182 if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
8184 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
8186 GenTreePtr treeWithLcl = retExpr->gtGetOp1();
8187 while (treeWithLcl->gtOper == GT_CAST)
8189 noway_assert(!treeWithLcl->gtOverflow());
8190 treeWithLcl = treeWithLcl->gtGetOp1();
8193 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
8194 treeWithLcl->AsLclVarCommon()->gtLclNum);
8197 fgRemoveStmt(compCurBB, nextMorphStmt);
8200 fgMorphStmt->gtStmtExpr = call;
8202 // Tail call via helper: The VM can't use return address hijacking if we're
8203 // not going to return and the helper doesn't have enough info to safely poll,
8204 // so we poll before the tail call, if the block isn't already safe. Since
8205 // tail call via helper is a slow mechanism it doen't matter whether we emit
8206 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8207 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8209 // Note that we can avoid emitting GC-poll if we know that the current BB is
8210 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8211 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8212 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8213 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8214 // low cost check, we check whether the first and current basic blocks are
8217 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8218 // is going to mark the method as fully interruptible if the block containing this tail
8219 // call is reachable without executing any call.
8220 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8221 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8223 // We didn't insert a poll block, so we need to morph the call now
8224 // (Normally it will get morphed when we get to the split poll block)
8225 GenTreePtr temp = fgMorphCall(call);
8226 noway_assert(temp == call);
8229 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8230 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8232 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8233 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8234 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8236 if (canFastTailCall)
8238 compCurBB->bbFlags |= BBF_HAS_JMP;
8242 compCurBB->bbJumpKind = BBJ_THROW;
8245 // For non-void calls, we return a place holder which will be
8246 // used by the parent GT_RETURN node of this call.
8248 GenTree* result = call;
8249 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8252 // Return a dummy node, as the return is already removed.
8253 if (callType == TYP_STRUCT)
8255 // This is a HFA, use float 0.
8256 callType = TYP_FLOAT;
8258 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8259 // Return a dummy node, as the return is already removed.
8260 if (varTypeIsStruct(callType))
8262 // This is a register-returned struct. Return a 0.
8263 // The actual return registers are hacked in lower and the register allocator.
8268 // Return a dummy node, as the return is already removed.
8269 if (varTypeIsSIMD(callType))
8271 callType = TYP_DOUBLE;
8274 result = gtNewZeroConNode(genActualType(callType));
8275 result = fgMorphTree(result);
8283 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8284 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8285 #ifdef FEATURE_READYTORUN_COMPILER
8286 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8289 (call == fgMorphStmt->gtStmtExpr))
8291 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8292 // Transform it into a null check.
8294 GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8296 GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8297 nullCheck->gtFlags |= GTF_EXCEPT;
8299 return fgMorphTree(nullCheck);
8302 noway_assert(call->gtOper == GT_CALL);
8305 // Only count calls once (only in the global morph phase)
8309 if (call->gtCallType == CT_INDIRECT)
8312 optIndirectCallCount++;
8314 else if (call->gtCallType == CT_USER_FUNC)
8317 if (call->IsVirtual())
8319 optIndirectCallCount++;
8324 // Couldn't inline - remember that this BB contains method calls
8326 // If this is a 'regular' call, mark the basic block as
8327 // having a call (for computing full interruptibility).
8328 CLANG_FORMAT_COMMENT_ANCHOR;
8330 if (IsGcSafePoint(call))
8332 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8335 // Morph Type.op_Equality and Type.op_Inequality
8336 // We need to do this before the arguments are morphed
8337 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8339 CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8341 genTreeOps simpleOp = GT_CALL;
8342 if (methodID == CORINFO_INTRINSIC_TypeEQ)
8346 else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8351 if (simpleOp == GT_EQ || simpleOp == GT_NE)
8353 noway_assert(call->TypeGet() == TYP_INT);
8355 // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8356 // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8357 // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8358 // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8359 // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8360 // optimization below.
8362 GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8363 GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8365 if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8367 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8369 // fgMorphSmpOp will further optimize the following patterns:
8370 // 1. typeof(...) == typeof(...)
8371 // 2. typeof(...) == obj.GetType()
8372 return fgMorphTree(compare);
8377 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8378 GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8380 unsigned retValTmpNum = BAD_VAR_NUM;
8381 CORINFO_CLASS_HANDLE structHnd = nullptr;
8382 if (call->HasRetBufArg() &&
8383 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8385 // We're enforcing the invariant that return buffers pointers (at least for
8386 // struct return types containing GC pointers) are never pointers into the heap.
8387 // The large majority of cases are address of local variables, which are OK.
8388 // Otherwise, allocate a local of the given struct type, pass its address,
8389 // then assign from that into the proper destination. (We don't need to do this
8390 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8391 // will maintain the same invariant.)
8393 GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8394 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8395 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8397 // We'll exempt helper calls from this, assuming that the helper implementation
8398 // follows the old convention, and does whatever barrier is required.
8399 if (call->gtCallType != CT_HELPER)
8401 structHnd = call->gtRetClsHnd;
8402 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8403 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8404 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8408 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8409 lvaSetStruct(retValTmpNum, structHnd, true);
8410 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8415 call->gtCallArgs->gtOp.gtOp1 = dest;
8418 /* Process the "normal" argument list */
8419 call = fgMorphArgs(call);
8420 noway_assert(call->gtOper == GT_CALL);
8422 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8423 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8424 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8426 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8427 if (value->IsIntegralConst(0))
8429 assert(value->OperGet() == GT_CNS_INT);
8431 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8432 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8434 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8435 // the spill trees as well if necessary.
8436 GenTreeOp* argSetup = nullptr;
8437 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8439 GenTree* const arg = earlyArgs->Current();
8440 if (arg->OperGet() != GT_ASG)
8446 assert(arg != index);
8448 arg->gtFlags &= ~GTF_LATE_ARG;
8450 GenTree* op1 = argSetup;
8453 op1 = gtNewNothingNode();
8455 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8459 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8462 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8467 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8468 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8469 return WALK_CONTINUE;
8472 fgWalkTreePost(&arr, resetMorphedFlag);
8473 fgWalkTreePost(&index, resetMorphedFlag);
8474 fgWalkTreePost(&value, resetMorphedFlag);
8477 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8478 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8479 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8480 arrStore->gtFlags |= GTF_ASG;
8482 GenTree* result = fgMorphTree(arrStore);
8483 if (argSetup != nullptr)
8485 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8487 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8495 // Optimize get_ManagedThreadId(get_CurrentThread)
8496 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8497 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8499 noway_assert(origDest == nullptr);
8500 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8502 GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8504 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8505 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8506 CORINFO_INTRINSIC_GetCurrentManagedThread)
8508 // substitute expression with call to helper
8509 GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8510 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8511 return fgMorphTree(newCall);
8515 if (origDest != nullptr)
8517 GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8518 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8519 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8520 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8522 if (origDest->OperGet() == GT_ASG)
8524 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8526 GenTreePtr var = origDest->gtOp.gtOp1;
8527 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8528 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8531 GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8532 copyBlk = fgMorphTree(copyBlk);
8533 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8535 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8540 if (call->IsNoReturn())
8543 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8544 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8545 // As a result the compiler won't need to preserve live registers across the call.
8547 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8548 // Besides, the tail call code is part of the epilog and converting the block to
8549 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8550 // only for BBJ_RETURN blocks.
8552 // Currently this doesn't work for non-void callees. Some of the code that handles
8553 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8554 // do not have this flag by default. We could add the flag here but the proper solution
8555 // would be to replace the return expression with a local var node during inlining
8556 // so the rest of the call tree stays in a separate statement. That statement can then
8557 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8560 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8562 fgRemoveRestOfBlock = true;
8569 /*****************************************************************************
8571 * Transform the given GTK_CONST tree for code generation.
8574 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8576 assert(tree->OperKind() & GTK_CONST);
8578 /* Clear any exception flags or other unnecessary flags
8579 * that may have been set before folding this node to a constant */
8581 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8583 if (tree->OperGet() != GT_CNS_STR)
8588 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8589 // guarantee slow performance for that block. Instead cache the return value
8590 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8592 if (compCurBB->bbJumpKind == BBJ_THROW)
8594 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8595 if (helper != CORINFO_HELP_UNDEF)
8597 // For un-important blocks, we want to construct the string lazily
8599 GenTreeArgList* args;
8600 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8602 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8606 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8607 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8610 tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8611 return fgMorphTree(tree);
8615 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8618 InfoAccessType iat =
8619 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8621 tree = gtNewStringLiteralNode(iat, pValue);
8623 return fgMorphTree(tree);
8626 /*****************************************************************************
8628 * Transform the given GTK_LEAF tree for code generation.
8631 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8633 assert(tree->OperKind() & GTK_LEAF);
8635 if (tree->gtOper == GT_LCL_VAR)
8637 const bool forceRemorph = false;
8638 return fgMorphLocalVar(tree, forceRemorph);
8641 else if (tree->gtOper == GT_LCL_FLD)
8643 if (info.compIsVarArgs)
8645 GenTreePtr newTree =
8646 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8647 if (newTree != nullptr)
8649 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8651 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8657 #endif // _TARGET_X86_
8658 else if (tree->gtOper == GT_FTN_ADDR)
8660 CORINFO_CONST_LOOKUP addrInfo;
8662 #ifdef FEATURE_READYTORUN_COMPILER
8663 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8665 addrInfo = tree->gtFptrVal.gtEntryPoint;
8670 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8673 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8675 tree->SetOper(GT_CNS_INT);
8676 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8677 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8679 switch (addrInfo.accessType)
8682 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8683 tree->gtFlags |= GTF_IND_INVARIANT;
8688 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8692 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8696 noway_assert(!"Unknown addrInfo.accessType");
8699 return fgMorphTree(tree);
8705 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8707 GenTreeLclVarCommon* lclVarCmnTree;
8708 bool isEntire = false;
8709 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8713 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8717 // We consider partial definitions to be modeled as uses followed by definitions.
8718 // This captures the idea that precedings defs are not necessarily made redundant
8719 // by this definition.
8720 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8725 //------------------------------------------------------------------------
8726 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8729 // tree - The block assignment to be possibly morphed
8732 // The modified tree if successful, nullptr otherwise.
8735 // 'tree' must be a block assignment.
8738 // If successful, this method always returns the incoming tree, modifying only
8741 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8743 // This must be a block assignment.
8744 noway_assert(tree->OperIsBlkOp());
8745 var_types asgType = tree->TypeGet();
8747 GenTreePtr asg = tree;
8748 GenTreePtr dest = asg->gtGetOp1();
8749 GenTreePtr src = asg->gtGetOp2();
8750 unsigned destVarNum = BAD_VAR_NUM;
8751 LclVarDsc* destVarDsc = nullptr;
8752 GenTreePtr lclVarTree = nullptr;
8753 bool isCopyBlock = asg->OperIsCopyBlkOp();
8754 bool isInitBlock = !isCopyBlock;
8757 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8759 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8760 // The SIMD type in question could be Vector2f which is 8-bytes in size.
8761 // The below check is to make sure that we don't turn that copyblk
8762 // into a assignment, since rationalizer logic will transform the
8763 // copyblk appropriately. Otherwise, the transformation made in this
8764 // routine will prevent rationalizer logic and we might end up with
8765 // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8767 // TODO-1stClassStructs: This is here to preserve old behavior.
8768 // It should be eliminated.
8769 if (src->OperGet() == GT_SIMD)
8775 if (dest->gtEffectiveVal()->OperIsBlk())
8777 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8778 size = lhsBlk->Size();
8779 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8781 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8782 destVarDsc = &(lvaTable[destVarNum]);
8784 if (lhsBlk->OperGet() == GT_OBJ)
8786 clsHnd = lhsBlk->AsObj()->gtClass;
8791 // Is this an enregisterable struct that is already a simple assignment?
8792 // This can happen if we are re-morphing.
8793 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8797 noway_assert(dest->OperIsLocal());
8799 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8800 destVarDsc = &(lvaTable[destVarNum]);
8803 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8804 size = info.compCompHnd->getClassSize(clsHnd);
8808 size = destVarDsc->lvExactSize;
8813 // See if we can do a simple transformation:
8815 // GT_ASG <TYP_size>
8817 // GT_IND GT_IND or CNS_INT
8822 if (size == REGSIZE_BYTES)
8824 if (clsHnd == NO_CLASS_HANDLE)
8826 // A register-sized cpblk can be treated as an integer asignment.
8827 asgType = TYP_I_IMPL;
8832 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8833 asgType = getJitGCType(gcPtr);
8844 asgType = TYP_SHORT;
8847 #ifdef _TARGET_64BIT_
8851 #endif // _TARGET_64BIT_
8855 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8856 if (!varTypeIsStruct(asgType))
8858 // For initBlk, a non constant source is not going to allow us to fiddle
8859 // with the bits to create a single assigment.
8860 noway_assert(size <= REGSIZE_BYTES);
8862 if (isInitBlock && !src->IsConstInitVal())
8867 if (destVarDsc != nullptr)
8869 #if LOCAL_ASSERTION_PROP
8870 // Kill everything about dest
8871 if (optLocalAssertionProp)
8873 if (optAssertionCount > 0)
8875 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8878 #endif // LOCAL_ASSERTION_PROP
8880 // A previous incarnation of this code also required the local not to be
8881 // address-exposed(=taken). That seems orthogonal to the decision of whether
8882 // to do field-wise assignments: being address-exposed will cause it to be
8883 // "dependently" promoted, so it will be in the right memory location. One possible
8884 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8885 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
8886 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8887 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
8888 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8890 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
8893 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8895 // Use the dest local var directly, as well as its type.
8897 asgType = destVarDsc->lvType;
8899 // If the block operation had been a write to a local var of a small int type,
8900 // of the exact size of the small int type, and the var is NormalizeOnStore,
8901 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8902 // have done that normalization. If we're now making it into an assignment,
8903 // the NormalizeOnStore will work, and it can be a full def.
8904 if (destVarDsc->lvNormalizeOnStore())
8906 dest->gtFlags &= (~GTF_VAR_USEASG);
8911 // Could be a non-promoted struct, or a floating point type local, or
8912 // an int subject to a partial write. Don't enregister.
8913 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8915 // Mark the local var tree as a definition point of the local.
8916 lclVarTree->gtFlags |= GTF_VAR_DEF;
8917 if (size < destVarDsc->lvExactSize)
8918 { // If it's not a full-width assignment....
8919 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8922 if (dest == lclVarTree)
8924 dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8929 // Check to ensure we don't have a reducible *(& ... )
8930 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8932 GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8933 // Ignore reinterpret casts between int/gc
8934 if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8937 asgType = addrOp->TypeGet();
8941 if (dest->gtEffectiveVal()->OperIsIndir())
8943 // If we have no information about the destination, we have to assume it could
8944 // live anywhere (not just in the GC heap).
8945 // Mark the GT_IND node so that we use the correct write barrier helper in case
8946 // the field is a GC ref.
8948 if (!fgIsIndirOfAddrOfLocal(dest))
8950 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8951 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8955 LclVarDsc* srcVarDsc = nullptr;
8958 if (src->OperGet() == GT_LCL_VAR)
8961 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8963 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8965 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8967 if (srcVarDsc != nullptr)
8969 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8971 // Let fgMorphCopyBlock handle it.
8974 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8975 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8977 // Use the src local var directly.
8982 #ifndef LEGACY_BACKEND
8984 // The source argument of the copyblk can potentially
8985 // be accessed only through indir(addr(lclVar))
8986 // or indir(lclVarAddr) in rational form and liveness
8987 // won't account for these uses. That said,
8988 // we have to mark this local as address exposed so
8989 // we don't delete it as a dead store later on.
8990 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
8991 lvaTable[lclVarNum].lvAddrExposed = true;
8992 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8994 #else // LEGACY_BACKEND
8995 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8996 #endif // LEGACY_BACKEND
8998 if (src == lclVarTree)
9000 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9001 src = gtNewOperNode(GT_IND, asgType, srcAddr);
9005 assert(src->OperIsIndir());
9009 // If we have no information about the src, we have to assume it could
9010 // live anywhere (not just in the GC heap).
9011 // Mark the GT_IND node so that we use the correct write barrier helper in case
9012 // the field is a GC ref.
9014 if (!fgIsIndirOfAddrOfLocal(src))
9016 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9023 if (varTypeIsSIMD(asgType))
9025 assert(!isCopyBlock); // Else we would have returned the tree above.
9026 noway_assert(src->IsIntegralConst(0));
9027 noway_assert(destVarDsc != nullptr);
9029 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9030 tree->gtOp.gtOp2 = src;
9036 if (src->OperIsInitVal())
9038 src = src->gtGetOp1();
9040 assert(src->IsCnsIntOrI());
9041 // This will mutate the integer constant, in place, to be the correct
9042 // value for the type we are using in the assignment.
9043 src->AsIntCon()->FixupInitBlkValue(asgType);
9047 // Ensure that the dest is setup appropriately.
9048 if (dest->gtEffectiveVal()->OperIsIndir())
9050 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9053 // Ensure that the rhs is setup appropriately.
9056 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9059 // Set the lhs and rhs on the assignment.
9060 if (dest != tree->gtOp.gtOp1)
9062 asg->gtOp.gtOp1 = dest;
9064 if (src != asg->gtOp.gtOp2)
9066 asg->gtOp.gtOp2 = src;
9069 asg->ChangeType(asgType);
9070 dest->gtFlags |= GTF_DONT_CSE;
9071 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9072 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9073 asg->gtFlags &= ~GTF_REVERSE_OPS;
9078 printf("fgMorphOneAsgBlock (after):\n");
9088 //------------------------------------------------------------------------
9089 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9092 // tree - a tree node with a gtOper of GT_INITBLK
9093 // the child nodes for tree have already been Morphed
9096 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9097 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9098 // If we have performed struct promotion of the Dest() then we will try to
9099 // perform a field by field assignment for each of the promoted struct fields
9102 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9103 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9104 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9106 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
9108 // We must have the GT_ASG form of InitBlkOp.
9109 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9111 bool morphed = false;
9114 GenTree* asg = tree;
9115 GenTree* src = tree->gtGetOp2();
9116 GenTree* origDest = tree->gtGetOp1();
9118 GenTree* dest = fgMorphBlkNode(origDest, true);
9119 if (dest != origDest)
9121 tree->gtOp.gtOp1 = dest;
9123 tree->gtType = dest->TypeGet();
9124 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9125 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9126 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9128 src->gtType = TYP_INT;
9130 JITDUMP("\nfgMorphInitBlock:");
9132 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9135 JITDUMP(" using oneAsgTree.\n");
9140 GenTree* destAddr = nullptr;
9141 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9142 GenTree* blockSize = nullptr;
9143 unsigned blockWidth = 0;
9144 FieldSeqNode* destFldSeq = nullptr;
9145 LclVarDsc* destLclVar = nullptr;
9146 bool destDoFldAsg = false;
9147 unsigned destLclNum = BAD_VAR_NUM;
9148 bool blockWidthIsConst = false;
9149 GenTreeLclVarCommon* lclVarTree = nullptr;
9150 if (dest->IsLocal())
9152 lclVarTree = dest->AsLclVarCommon();
9156 if (dest->OperIsBlk())
9158 destAddr = dest->AsBlk()->Addr();
9159 blockWidth = dest->AsBlk()->gtBlkSize;
9163 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9164 destAddr = dest->gtGetOp1();
9165 blockWidth = genTypeSize(dest->TypeGet());
9168 if (lclVarTree != nullptr)
9170 destLclNum = lclVarTree->gtLclNum;
9171 destLclVar = &lvaTable[destLclNum];
9172 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9173 blockWidthIsConst = true;
9177 if (dest->gtOper == GT_DYN_BLK)
9179 // The size must be an integer type
9180 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9181 assert(varTypeIsIntegral(blockSize->gtType));
9185 assert(blockWidth != 0);
9186 blockWidthIsConst = true;
9189 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9191 destLclNum = lclVarTree->gtLclNum;
9192 destLclVar = &lvaTable[destLclNum];
9195 if (destLclNum != BAD_VAR_NUM)
9197 #if LOCAL_ASSERTION_PROP
9198 // Kill everything about destLclNum (and its field locals)
9199 if (optLocalAssertionProp)
9201 if (optAssertionCount > 0)
9203 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9206 #endif // LOCAL_ASSERTION_PROP
9208 if (destLclVar->lvPromoted && blockWidthIsConst)
9210 assert(initVal->OperGet() == GT_CNS_INT);
9211 noway_assert(varTypeIsStruct(destLclVar));
9212 noway_assert(!opts.MinOpts());
9213 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9215 JITDUMP(" dest is address exposed");
9219 if (blockWidth == destLclVar->lvExactSize)
9221 JITDUMP(" (destDoFldAsg=true)");
9222 // We may decide later that a copyblk is required when this struct has holes
9223 destDoFldAsg = true;
9227 JITDUMP(" with mismatched size");
9233 // Can we use field by field assignment for the dest?
9234 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9236 JITDUMP(" dest contains holes");
9237 destDoFldAsg = false;
9240 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9242 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9243 // we need to change it back.
9244 if (!destDoFldAsg && !dest->OperIsBlk())
9246 noway_assert(blockWidth != 0);
9247 tree->gtOp.gtOp1 = origDest;
9248 tree->gtType = origDest->gtType;
9251 if (!destDoFldAsg && (destLclVar != nullptr))
9253 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9254 if (!destLclVar->lvRegStruct)
9256 // Mark it as DoNotEnregister.
9257 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9261 // Mark the dest struct as DoNotEnreg
9262 // when they are LclVar structs and we are using a CopyBlock
9263 // or the struct is not promoted
9267 #if CPU_USES_BLOCK_MOVE
9268 compBlkOpUsed = true;
9270 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9271 tree->gtOp.gtOp1 = dest;
9272 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9276 // The initVal must be a constant of TYP_INT
9277 noway_assert(initVal->OperGet() == GT_CNS_INT);
9278 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9280 // The dest must be of a struct type.
9281 noway_assert(varTypeIsStruct(destLclVar));
9284 // Now, convert InitBlock to individual assignments
9288 INDEBUG(morphed = true);
9292 unsigned fieldLclNum;
9293 unsigned fieldCnt = destLclVar->lvFieldCnt;
9295 for (unsigned i = 0; i < fieldCnt; ++i)
9297 fieldLclNum = destLclVar->lvFieldLclStart + i;
9298 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9300 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9301 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9302 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9304 srcCopy = gtCloneExpr(initVal);
9305 noway_assert(srcCopy != nullptr);
9307 // need type of oper to be same as tree
9308 if (dest->gtType == TYP_LONG)
9310 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9311 // copy and extend the value
9312 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9313 /* Change the types of srcCopy to TYP_LONG */
9314 srcCopy->gtType = TYP_LONG;
9316 else if (varTypeIsFloating(dest->gtType))
9318 srcCopy->ChangeOperConst(GT_CNS_DBL);
9319 // setup the bit pattern
9320 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9321 sizeof(srcCopy->gtDblCon.gtDconVal));
9322 /* Change the types of srcCopy to TYP_DOUBLE */
9323 srcCopy->gtType = TYP_DOUBLE;
9327 noway_assert(srcCopy->gtOper == GT_CNS_INT);
9328 noway_assert(srcCopy->TypeGet() == TYP_INT);
9329 // setup the bit pattern
9330 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9331 sizeof(srcCopy->gtIntCon.gtIconVal));
9334 srcCopy->gtType = dest->TypeGet();
9336 asg = gtNewAssignNode(dest, srcCopy);
9338 #if LOCAL_ASSERTION_PROP
9339 if (optLocalAssertionProp)
9341 optAssertionGen(asg);
9343 #endif // LOCAL_ASSERTION_PROP
9347 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9360 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9364 printf("fgMorphInitBlock (after):\n");
9373 //------------------------------------------------------------------------
9374 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9377 // tree - the node to be modified.
9378 // type - the type of indirection to change it to.
9381 // Returns the node, modified in place.
9384 // This doesn't really warrant a separate method, but is here to abstract
9385 // the fact that these nodes can be modified in-place.
9387 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9389 tree->SetOper(GT_IND);
9390 tree->gtType = type;
9394 //------------------------------------------------------------------------
9395 // fgMorphGetStructAddr: Gets the address of a struct object
9398 // pTree - the parent's pointer to the struct object node
9399 // clsHnd - the class handle for the struct type
9400 // isRValue - true if this is a source (not dest)
9403 // Returns the address of the struct value, possibly modifying the existing tree to
9404 // sink the address below any comma nodes (this is to canonicalize for value numbering).
9405 // If this is a source, it will morph it to an GT_IND before taking its address,
9406 // since it may not be remorphed (and we don't want blk nodes as rvalues).
9408 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9411 GenTree* tree = *pTree;
9412 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9413 // need to hang onto that for the purposes of value numbering.
9414 if (tree->OperIsIndir())
9416 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9418 addr = tree->gtOp.gtOp1;
9422 if (isRValue && tree->OperIsBlk())
9424 tree->ChangeOper(GT_IND);
9426 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9429 else if (tree->gtOper == GT_COMMA)
9431 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9432 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9433 tree->gtType = TYP_BYREF;
9438 switch (tree->gtOper)
9445 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9449 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9450 // not going to use "temp"
9451 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9452 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9461 //------------------------------------------------------------------------
9462 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9465 // tree - The struct type node
9466 // isDest - True if this is the destination of the assignment
9469 // Returns the possibly-morphed node. The caller is responsible for updating
9470 // the parent of this node..
9472 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9474 if (tree->gtOper == GT_COMMA)
9476 GenTree* effectiveVal = tree->gtEffectiveVal();
9477 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9479 addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9481 // In order to CSE and value number array index expressions and bounds checks,
9482 // the commas in which they are contained need to match.
9483 // The pattern is that the COMMA should be the address expression.
9484 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9485 // TODO-1stClassStructs: Consider whether this can be improved.
9486 // Also consider whether some of this can be included in gtNewBlockVal (though note
9487 // that doing so may cause us to query the type system before we otherwise would).
9488 GenTree* lastComma = nullptr;
9489 for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9491 next->gtType = TYP_BYREF;
9494 if (lastComma != nullptr)
9496 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9497 lastComma->gtOp.gtOp2 = addr;
9500 var_types structType = effectiveVal->TypeGet();
9501 if (structType == TYP_STRUCT)
9503 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9504 if (structHnd == NO_CLASS_HANDLE)
9506 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9510 tree = gtNewObjNode(structHnd, addr);
9511 if (tree->OperGet() == GT_OBJ)
9513 gtSetObjGcInfo(tree->AsObj());
9519 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9522 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9526 if (!tree->OperIsBlk())
9530 GenTreeBlk* blkNode = tree->AsBlk();
9531 if (blkNode->OperGet() == GT_DYN_BLK)
9533 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9535 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9536 // A GT_BLK with size of zero is not supported,
9537 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9540 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9541 blkNode->ChangeOper(GT_BLK);
9542 blkNode->gtBlkSize = size;
9554 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9555 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9557 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9558 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9560 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9567 //------------------------------------------------------------------------
9568 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9571 // tree - The block operand
9572 // asgType - The type of the assignment
9573 // blockWidth - The size of the block
9574 // isDest - true iff this is the destination of the assignment
9577 // Returns the morphed block operand
9580 // This does the following:
9581 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9582 // - Ensures that any COMMAs are above ADDR nodes.
9583 // Although 'tree' WAS an operand of a block assignment, the assignment
9584 // may have been retyped to be a scalar assignment.
9586 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9588 GenTree* effectiveVal = tree->gtEffectiveVal();
9590 if (!varTypeIsStruct(asgType))
9592 if (effectiveVal->OperIsIndir())
9594 GenTree* addr = effectiveVal->AsIndir()->Addr();
9595 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9597 effectiveVal = addr->gtGetOp1();
9599 else if (effectiveVal->OperIsBlk())
9601 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9605 effectiveVal->gtType = asgType;
9608 else if (effectiveVal->TypeGet() != asgType)
9610 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9611 effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
9616 GenTreeIndir* indirTree = nullptr;
9617 GenTreeLclVarCommon* lclNode = nullptr;
9618 bool needsIndirection = true;
9620 if (effectiveVal->OperIsIndir())
9622 indirTree = effectiveVal->AsIndir();
9623 GenTree* addr = effectiveVal->AsIndir()->Addr();
9624 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9626 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9629 else if (effectiveVal->OperGet() == GT_LCL_VAR)
9631 lclNode = effectiveVal->AsLclVarCommon();
9634 if (varTypeIsSIMD(asgType))
9636 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9637 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9640 needsIndirection = false;
9641 effectiveVal = indirTree->Addr()->gtGetOp1();
9643 if (effectiveVal->OperIsSIMD())
9645 needsIndirection = false;
9648 #endif // FEATURE_SIMD
9649 if (lclNode != nullptr)
9651 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9652 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9654 #ifndef LEGACY_BACKEND
9655 effectiveVal = lclNode;
9656 needsIndirection = false;
9657 #endif // !LEGACY_BACKEND
9661 // This may be a lclVar that was determined to be address-exposed.
9662 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9665 if (needsIndirection)
9667 if (indirTree != nullptr)
9669 // We should never find a struct indirection on the lhs of an assignment.
9670 assert(!isDest || indirTree->OperIsBlk());
9671 if (!isDest && indirTree->OperIsBlk())
9673 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9679 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9682 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9683 if (clsHnd == NO_CLASS_HANDLE)
9685 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9689 newTree = gtNewObjNode(clsHnd, addr);
9690 if (isDest && (newTree->OperGet() == GT_OBJ))
9692 gtSetObjGcInfo(newTree->AsObj());
9694 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9696 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9697 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9698 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9699 // separately now to avoid excess diffs.
9700 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9706 newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9708 effectiveVal = newTree;
9712 tree = effectiveVal;
9716 //------------------------------------------------------------------------
9717 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9720 // dest - the GT_OBJ or GT_STORE_OBJ
9723 // The destination must be known (by the caller) to be on the stack.
9726 // If we have a CopyObj with a dest on the stack, and its size is small enouch
9727 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9728 // GC Unsafe CopyBlk that is non-interruptible.
9729 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9731 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9733 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9734 assert(dest->gtGcPtrCount != 0);
9735 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9737 bool destOnStack = false;
9738 GenTree* destAddr = dest->Addr();
9739 assert(destAddr->IsLocalAddrExpr() != nullptr);
9741 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9743 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9744 dest->SetOper(newOper);
9745 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9747 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9750 //------------------------------------------------------------------------
9751 // fgMorphCopyBlock: Perform the Morphing of block copy
9754 // tree - a block copy (i.e. an assignment with a block op on the lhs).
9757 // We can return the orginal block copy unmodified (least desirable, but always correct)
9758 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9759 // If we have performed struct promotion of the Source() or the Dest() then we will try to
9760 // perform a field by field assignment for each of the promoted struct fields.
9763 // The child nodes for tree have already been Morphed.
9766 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9767 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9768 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9769 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9770 // can not use a field by field assignment and must the orginal block copy unmodified.
9772 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9774 noway_assert(tree->OperIsCopyBlkOp());
9776 JITDUMP("\nfgMorphCopyBlock:");
9778 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9780 GenTree* asg = tree;
9781 GenTree* rhs = asg->gtGetOp2();
9782 GenTree* dest = asg->gtGetOp1();
9784 #if FEATURE_MULTIREG_RET
9785 // If this is a multi-reg return, we will not do any morphing of this node.
9786 if (rhs->IsMultiRegCall())
9788 assert(dest->OperGet() == GT_LCL_VAR);
9789 JITDUMP(" not morphing a multireg call return\n");
9792 #endif // FEATURE_MULTIREG_RET
9794 // If we have an array index on the lhs, we need to create an obj node.
9796 dest = fgMorphBlkNode(dest, true);
9797 if (dest != asg->gtGetOp1())
9799 asg->gtOp.gtOp1 = dest;
9800 if (dest->IsLocal())
9802 dest->gtFlags |= GTF_VAR_DEF;
9805 asg->gtType = dest->TypeGet();
9806 rhs = fgMorphBlkNode(rhs, false);
9808 asg->gtOp.gtOp2 = rhs;
9810 GenTreePtr oldTree = tree;
9811 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9815 JITDUMP(" using oneAsgTree.\n");
9820 unsigned blockWidth;
9821 bool blockWidthIsConst = false;
9822 GenTreeLclVarCommon* lclVarTree = nullptr;
9823 GenTreeLclVarCommon* srcLclVarTree = nullptr;
9824 unsigned destLclNum = BAD_VAR_NUM;
9825 LclVarDsc* destLclVar = nullptr;
9826 FieldSeqNode* destFldSeq = nullptr;
9827 bool destDoFldAsg = false;
9828 GenTreePtr destAddr = nullptr;
9829 GenTreePtr srcAddr = nullptr;
9830 bool destOnStack = false;
9831 bool hasGCPtrs = false;
9833 JITDUMP("block assignment to morph:\n");
9836 if (dest->IsLocal())
9838 blockWidthIsConst = true;
9840 if (dest->gtOper == GT_LCL_VAR)
9842 lclVarTree = dest->AsLclVarCommon();
9843 destLclNum = lclVarTree->gtLclNum;
9844 destLclVar = &lvaTable[destLclNum];
9845 if (destLclVar->lvType == TYP_STRUCT)
9847 // It would be nice if lvExactSize always corresponded to the size of the struct,
9848 // but it doesn't always for the temps that the importer creates when it spills side
9850 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9851 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9855 blockWidth = genTypeSize(destLclVar->lvType);
9857 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9861 assert(dest->TypeGet() != TYP_STRUCT);
9862 assert(dest->gtOper == GT_LCL_FLD);
9863 blockWidth = genTypeSize(dest->TypeGet());
9864 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9865 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9870 GenTree* effectiveDest = dest->gtEffectiveVal();
9871 if (effectiveDest->OperGet() == GT_IND)
9873 assert(dest->TypeGet() != TYP_STRUCT);
9874 blockWidth = genTypeSize(effectiveDest->TypeGet());
9875 blockWidthIsConst = true;
9876 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9878 destAddr = dest->gtGetOp1();
9883 assert(effectiveDest->OperIsBlk());
9884 GenTreeBlk* blk = effectiveDest->AsBlk();
9886 blockWidth = blk->gtBlkSize;
9887 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9888 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9890 destAddr = blk->Addr();
9893 if (destAddr != nullptr)
9895 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9896 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9899 destLclNum = lclVarTree->gtLclNum;
9900 destLclVar = &lvaTable[destLclNum];
9905 if (destLclVar != nullptr)
9907 #if LOCAL_ASSERTION_PROP
9908 // Kill everything about destLclNum (and its field locals)
9909 if (optLocalAssertionProp)
9911 if (optAssertionCount > 0)
9913 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9916 #endif // LOCAL_ASSERTION_PROP
9918 if (destLclVar->lvPromoted && blockWidthIsConst)
9920 noway_assert(varTypeIsStruct(destLclVar));
9921 noway_assert(!opts.MinOpts());
9923 if (blockWidth == destLclVar->lvExactSize)
9925 JITDUMP(" (destDoFldAsg=true)");
9926 // We may decide later that a copyblk is required when this struct has holes
9927 destDoFldAsg = true;
9931 JITDUMP(" with mismatched dest size");
9936 FieldSeqNode* srcFldSeq = nullptr;
9937 unsigned srcLclNum = BAD_VAR_NUM;
9938 LclVarDsc* srcLclVar = nullptr;
9939 bool srcDoFldAsg = false;
9943 srcLclVarTree = rhs->AsLclVarCommon();
9944 srcLclNum = srcLclVarTree->gtLclNum;
9945 if (rhs->OperGet() == GT_LCL_FLD)
9947 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9950 else if (rhs->OperIsIndir())
9952 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9954 srcLclNum = srcLclVarTree->gtLclNum;
9958 srcAddr = rhs->gtOp.gtOp1;
9962 if (srcLclNum != BAD_VAR_NUM)
9964 srcLclVar = &lvaTable[srcLclNum];
9966 if (srcLclVar->lvPromoted && blockWidthIsConst)
9968 noway_assert(varTypeIsStruct(srcLclVar));
9969 noway_assert(!opts.MinOpts());
9971 if (blockWidth == srcLclVar->lvExactSize)
9973 JITDUMP(" (srcDoFldAsg=true)");
9974 // We may decide later that a copyblk is required when this struct has holes
9979 JITDUMP(" with mismatched src size");
9984 // Check to see if we are required to do a copy block because the struct contains holes
9985 // and either the src or dest is externally visible
9987 bool requiresCopyBlock = false;
9988 bool srcSingleLclVarAsg = false;
9989 bool destSingleLclVarAsg = false;
9991 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9993 // Self-assign; no effect.
9994 GenTree* nop = gtNewNothingNode();
9995 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9999 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10000 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10002 requiresCopyBlock = true;
10005 // Can we use field by field assignment for the dest?
10006 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10008 JITDUMP(" dest contains custom layout and contains holes");
10009 // C++ style CopyBlock with holes
10010 requiresCopyBlock = true;
10013 // Can we use field by field assignment for the src?
10014 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10016 JITDUMP(" src contains custom layout and contains holes");
10017 // C++ style CopyBlock with holes
10018 requiresCopyBlock = true;
10021 #if defined(_TARGET_ARM_)
10022 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10024 JITDUMP(" rhs is unaligned");
10025 requiresCopyBlock = true;
10028 if (asg->gtFlags & GTF_BLK_UNALIGNED)
10030 JITDUMP(" asg is unaligned");
10031 requiresCopyBlock = true;
10033 #endif // _TARGET_ARM_
10035 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10037 requiresCopyBlock = true;
10040 // Can't use field by field assignment if the src is a call.
10041 if (rhs->OperGet() == GT_CALL)
10043 JITDUMP(" src is a call");
10044 // C++ style CopyBlock with holes
10045 requiresCopyBlock = true;
10048 // If we passed the above checks, then we will check these two
10049 if (!requiresCopyBlock)
10051 // Are both dest and src promoted structs?
10052 if (destDoFldAsg && srcDoFldAsg)
10054 // Both structs should be of the same type, or each have a single field of the same type.
10055 // If not we will use a copy block.
10056 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10057 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10059 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10060 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10061 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10062 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10064 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10065 JITDUMP(" with mismatched types");
10069 // Are neither dest or src promoted structs?
10070 else if (!destDoFldAsg && !srcDoFldAsg)
10072 requiresCopyBlock = true; // Leave as a CopyBlock
10073 JITDUMP(" with no promoted structs");
10075 else if (destDoFldAsg)
10077 // Match the following kinds of trees:
10078 // fgMorphTree BB01, stmt 9 (before)
10079 // [000052] ------------ const int 8
10080 // [000053] -A--G------- copyBlk void
10081 // [000051] ------------ addr byref
10082 // [000050] ------------ lclVar long V07 loc5
10083 // [000054] --------R--- <list> void
10084 // [000049] ------------ addr byref
10085 // [000048] ------------ lclVar struct(P) V06 loc4
10086 // long V06.h (offs=0x00) -> V17 tmp9
10087 // Yields this transformation
10088 // fgMorphCopyBlock (after):
10089 // [000050] ------------ lclVar long V07 loc5
10090 // [000085] -A---------- = long
10091 // [000083] D------N---- lclVar long V17 tmp9
10093 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10094 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10096 // Reject the following tree:
10097 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10099 // fgMorphTree BB01, stmt 6 (before)
10100 // [000038] ------------- const int 4
10101 // [000039] -A--G-------- copyBlk void
10102 // [000037] ------------- addr byref
10103 // [000036] ------------- lclVar int V05 loc3
10104 // [000040] --------R---- <list> void
10105 // [000035] ------------- addr byref
10106 // [000034] ------------- lclVar struct(P) V04 loc2
10107 // float V04.f1 (offs=0x00) -> V13 tmp6
10108 // As this would framsform into
10109 // float V13 = int V05
10111 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10112 var_types destType = lvaTable[fieldLclNum].TypeGet();
10113 if (srcLclVar->TypeGet() == destType)
10115 srcSingleLclVarAsg = true;
10121 assert(srcDoFldAsg);
10122 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10124 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10125 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10126 // [000245] -A------R--- * = struct (copy)
10127 // [000244] -----+------ \--* obj(8) struct
10128 // [000243] -----+------ \--* addr byref
10129 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10131 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10132 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10134 // Check for type agreement
10135 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10136 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10137 if (destLclVar->TypeGet() == srcType)
10139 destSingleLclVarAsg = true;
10145 // If we require a copy block the set both of the field assign bools to false
10146 if (requiresCopyBlock)
10148 // If a copy block is required then we won't do field by field assignments
10149 destDoFldAsg = false;
10150 srcDoFldAsg = false;
10153 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10155 // Mark the dest/src structs as DoNotEnreg
10156 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10157 // or the struct is not promoted
10159 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10161 if (!destLclVar->lvRegStruct)
10163 // Mark it as DoNotEnregister.
10164 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10168 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10170 if (!srcLclVar->lvRegStruct)
10172 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10176 if (requiresCopyBlock)
10178 #if CPU_USES_BLOCK_MOVE
10179 compBlkOpUsed = true;
10181 var_types asgType = dest->TypeGet();
10182 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10183 asg->gtOp.gtOp1 = dest;
10184 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10186 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10187 // Currently that includes x64 and ARM but not x86: the code generation for this
10188 // construct requires the ability to mark certain regions of the generated code
10189 // as non-interruptible, and the GC encoding for the latter platform does not
10190 // have this capability.
10192 // If we have a CopyObj with a dest on the stack
10193 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10194 // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
10195 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10197 if (destOnStack && (dest->OperGet() == GT_OBJ))
10199 fgMorphUnsafeBlk(dest->AsObj());
10202 // Eliminate the "OBJ or BLK" node on the rhs.
10203 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10204 asg->gtOp.gtOp2 = rhs;
10206 #ifdef LEGACY_BACKEND
10207 if (!rhs->OperIsIndir())
10209 noway_assert(rhs->gtOper == GT_LCL_VAR);
10210 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10211 rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
10213 #endif // LEGACY_BACKEND
10214 // Formerly, liveness did not consider copyblk arguments of simple types as being
10215 // a use or def, so these variables were marked as address-exposed.
10216 // TODO-1stClassStructs: This should no longer be needed.
10217 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10219 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10220 lvaTable[srcLclNum].lvAddrExposed = true;
10223 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10225 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10226 lvaTable[destLclNum].lvAddrExposed = true;
10233 // Otherwise we convert this CopyBlock into individual field by field assignments
10238 GenTreePtr addrSpill = nullptr;
10239 unsigned addrSpillTemp = BAD_VAR_NUM;
10240 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10242 unsigned fieldCnt = DUMMY_INIT(0);
10244 if (destDoFldAsg && srcDoFldAsg)
10246 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10247 // All of these conditions were checked above...
10248 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10249 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10251 fieldCnt = destLclVar->lvFieldCnt;
10252 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10255 else if (destDoFldAsg)
10257 fieldCnt = destLclVar->lvFieldCnt;
10258 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10259 if (srcAddr == nullptr)
10261 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10266 assert(srcDoFldAsg);
10267 fieldCnt = srcLclVar->lvFieldCnt;
10268 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10269 if (dest->OperIsBlk())
10271 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10273 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10278 noway_assert(!srcDoFldAsg);
10279 if (gtClone(srcAddr))
10281 // srcAddr is simple expression. No need to spill.
10282 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10286 // srcAddr is complex expression. Clone and spill it (unless the destination is
10287 // a struct local that only has one field, in which case we'd only use the
10288 // address value once...)
10289 if (destLclVar->lvFieldCnt > 1)
10291 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10292 noway_assert(addrSpill != nullptr);
10299 noway_assert(!destDoFldAsg);
10301 // If we're doing field-wise stores, to an address within a local, and we copy
10302 // the address into "addrSpill", do *not* declare the original local var node in the
10303 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10304 // field-wise assignments as an "indirect" assignment to the local.
10305 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10307 if (lclVarTree != nullptr)
10309 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10312 if (gtClone(destAddr))
10314 // destAddr is simple expression. No need to spill
10315 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10319 // destAddr is complex expression. Clone and spill it (unless
10320 // the source is a struct local that only has one field, in which case we'd only
10321 // use the address value once...)
10322 if (srcLclVar->lvFieldCnt > 1)
10324 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10325 noway_assert(addrSpill != nullptr);
10328 // TODO-CQ: this should be based on a more general
10329 // "BaseAddress" method, that handles fields of structs, before or after
10331 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10333 if (addrSpill->gtOp.gtOp1->IsLocal())
10335 // We will *not* consider this to define the local, but rather have each individual field assign
10336 // be a definition.
10337 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10338 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10339 PROMOTION_TYPE_INDEPENDENT);
10340 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10341 // local stack frame
10347 if (addrSpill != nullptr)
10349 // Spill the (complex) address to a BYREF temp.
10350 // Note, at most one address may need to be spilled.
10351 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10353 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10355 if (addrSpillIsStackDest)
10357 lvaTable[addrSpillTemp].lvStackByref = true;
10360 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10362 // If we are assigning the address of a LclVar here
10363 // liveness does not account for this kind of address taken use.
10365 // We have to mark this local as address exposed so
10366 // that we don't delete the definition for this LclVar
10367 // as a dead store later on.
10369 if (addrSpill->OperGet() == GT_ADDR)
10371 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10372 if (addrOp->IsLocal())
10374 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10375 lvaTable[lclVarNum].lvAddrExposed = true;
10376 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10383 for (unsigned i = 0; i < fieldCnt; ++i)
10385 FieldSeqNode* curFieldSeq = nullptr;
10388 noway_assert(destLclNum != BAD_VAR_NUM);
10389 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10390 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10391 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10392 if (destAddr != nullptr)
10394 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10395 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10399 noway_assert(lclVarTree != nullptr);
10400 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10402 // Don't CSE the lhs of an assignment.
10403 dest->gtFlags |= GTF_DONT_CSE;
10407 noway_assert(srcDoFldAsg);
10408 noway_assert(srcLclNum != BAD_VAR_NUM);
10409 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10411 if (destSingleLclVarAsg)
10413 noway_assert(fieldCnt == 1);
10414 noway_assert(destLclVar != nullptr);
10415 noway_assert(addrSpill == nullptr);
10417 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10423 assert(addrSpillTemp != BAD_VAR_NUM);
10424 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10428 dest = gtCloneExpr(destAddr);
10429 noway_assert(dest != nullptr);
10431 // Is the address of a local?
10432 GenTreeLclVarCommon* lclVarTree = nullptr;
10433 bool isEntire = false;
10434 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10435 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10437 lclVarTree->gtFlags |= GTF_VAR_DEF;
10440 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10445 GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10446 // Have to set the field sequence -- which means we need the field handle.
10447 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10448 CORINFO_FIELD_HANDLE fieldHnd =
10449 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10450 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10451 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10453 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10455 dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10457 // !!! The destination could be on stack. !!!
10458 // This flag will let us choose the correct write barrier.
10459 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10465 noway_assert(srcLclNum != BAD_VAR_NUM);
10466 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10467 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10469 noway_assert(srcLclVarTree != nullptr);
10470 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10471 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10472 // but they are when they are under a GT_ADDR.
10473 src->gtFlags |= GTF_DONT_CSE;
10477 noway_assert(destDoFldAsg);
10478 noway_assert(destLclNum != BAD_VAR_NUM);
10479 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10481 if (srcSingleLclVarAsg)
10483 noway_assert(fieldCnt == 1);
10484 noway_assert(srcLclVar != nullptr);
10485 noway_assert(addrSpill == nullptr);
10487 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10493 assert(addrSpillTemp != BAD_VAR_NUM);
10494 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10498 src = gtCloneExpr(srcAddr);
10499 noway_assert(src != nullptr);
10502 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10503 CORINFO_FIELD_HANDLE fieldHnd =
10504 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10505 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10507 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10508 new (this, GT_CNS_INT)
10509 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10511 src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10515 noway_assert(dest->TypeGet() == src->TypeGet());
10517 asg = gtNewAssignNode(dest, src);
10519 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10520 // and it was of a local, record the assignment as an indirect update of a local.
10521 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10523 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10524 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10525 IndirectAssignmentAnnotation* pIndirAnnot =
10526 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10527 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10530 #if LOCAL_ASSERTION_PROP
10531 if (optLocalAssertionProp)
10533 optAssertionGen(asg);
10535 #endif // LOCAL_ASSERTION_PROP
10539 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10550 tree->gtFlags |= GTF_LATE_ARG;
10554 if (tree != oldTree)
10556 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10561 printf("\nfgMorphCopyBlock (after):\n");
10570 // insert conversions and normalize to make tree amenable to register
10571 // FP architectures
10572 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10574 if (tree->OperIsArithmetic())
10576 if (varTypeIsFloating(tree))
10578 GenTreePtr op1 = tree->gtOp.gtOp1;
10579 GenTreePtr op2 = tree->gtGetOp2();
10581 if (op1->TypeGet() != tree->TypeGet())
10583 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10585 if (op2->TypeGet() != tree->TypeGet())
10587 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10591 else if (tree->OperIsCompare())
10593 GenTreePtr op1 = tree->gtOp.gtOp1;
10595 if (varTypeIsFloating(op1))
10597 GenTreePtr op2 = tree->gtGetOp2();
10598 assert(varTypeIsFloating(op2));
10600 if (op1->TypeGet() != op2->TypeGet())
10602 // both had better be floating, just one bigger than other
10603 if (op1->TypeGet() == TYP_FLOAT)
10605 assert(op2->TypeGet() == TYP_DOUBLE);
10606 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10608 else if (op2->TypeGet() == TYP_FLOAT)
10610 assert(op1->TypeGet() == TYP_DOUBLE);
10611 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10620 //--------------------------------------------------------------------------------------------------------------
10621 // fgMorphRecognizeBoxNullable:
10622 // Recognize this pattern:
10624 // stmtExpr void (IL 0x000... ???)
10626 // CNS_INT ref null
10628 // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE
10629 // CNS_INT(h) long 0x7fed96836c8 class
10631 // FIELD struct value
10632 // LCL_VAR ref V00 this
10634 // which comes from this code:
10636 // return this.value==null;
10638 // and transform it into
10640 // stmtExpr void (IL 0x000... ???)
10642 // CNS_INT ref null
10646 // FIELD struct value
10647 // LCL_VAR ref V00 this
10650 // compare - Compare tree to optimize.
10653 // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
10654 // the original tree otherwise.
10657 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10659 GenTree* op1 = compare->gtOp.gtOp1;
10660 GenTree* op2 = compare->gtOp.gtOp2;
10662 GenTreeCall* opCall;
10664 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10667 opCall = op2->AsCall();
10669 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10672 opCall = op1->AsCall();
10679 if (!opCns->IsIntegralConst(0))
10684 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10689 // Get the nullable struct argument
10690 GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
10692 // Check for cases that are unsafe to optimize and return the unchanged tree
10693 if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
10698 // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10699 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
10703 compare->gtOp.gtOp1 = newOp;
10707 compare->gtOp.gtOp2 = newOp;
10713 #ifdef FEATURE_SIMD
10715 //--------------------------------------------------------------------------------------------------------------
10716 // getSIMDStructFromField:
10717 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10718 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
10719 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10720 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10721 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10722 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10723 // if the struct is a SIMD struct.
10726 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10727 // struct used for simd intrinsic or not.
10728 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10729 // to simd lclvar's base type.
10730 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10731 // equals to the index number of this field.
10732 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10733 // equals to the simd struct size which this tree belongs to.
10734 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10735 // the UsedInSIMDIntrinsic check.
10738 // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10739 // instrinic related field, return nullptr.
10742 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10743 var_types* pBaseTypeOut,
10744 unsigned* indexOut,
10745 unsigned* simdSizeOut,
10746 bool ignoreUsedInSIMDIntrinsic /*false*/)
10748 GenTreePtr ret = nullptr;
10749 if (tree->OperGet() == GT_FIELD)
10751 GenTreePtr objRef = tree->gtField.gtFldObj;
10752 if (objRef != nullptr)
10754 GenTreePtr obj = nullptr;
10755 if (objRef->gtOper == GT_ADDR)
10757 obj = objRef->gtOp.gtOp1;
10759 else if (ignoreUsedInSIMDIntrinsic)
10768 if (isSIMDTypeLocal(obj))
10770 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
10771 LclVarDsc* varDsc = &lvaTable[lclNum];
10772 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10774 *simdSizeOut = varDsc->lvExactSize;
10775 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10779 else if (obj->OperGet() == GT_SIMD)
10782 GenTreeSIMD* simdNode = obj->AsSIMD();
10783 *simdSizeOut = simdNode->gtSIMDSize;
10784 *pBaseTypeOut = simdNode->gtSIMDBaseType;
10788 if (ret != nullptr)
10790 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10791 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
10796 /*****************************************************************************
10797 * If a read operation tries to access simd struct field, then transform the
10798 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10799 * Otherwise, return the old tree.
10801 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10802 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10804 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10808 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10810 unsigned index = 0;
10811 var_types baseType = TYP_UNKNOWN;
10812 unsigned simdSize = 0;
10813 GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10814 if (simdStructNode != nullptr)
10816 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10817 GenTree* op2 = gtNewIconNode(index);
10818 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10820 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10826 /*****************************************************************************
10827 * Transform an assignment of a SIMD struct field to SIMD intrinsic
10828 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10829 * then return the old tree.
10831 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10832 * intrinsic, we will morph it as simd intrinsic set.
10834 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10838 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10840 assert(tree->OperGet() == GT_ASG);
10841 GenTreePtr op1 = tree->gtGetOp1();
10842 GenTreePtr op2 = tree->gtGetOp2();
10844 unsigned index = 0;
10845 var_types baseType = TYP_UNKNOWN;
10846 unsigned simdSize = 0;
10847 GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10848 if (simdOp1Struct != nullptr)
10850 // Generate the simd set intrinsic
10851 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10853 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10857 simdIntrinsicID = SIMDIntrinsicSetX;
10860 simdIntrinsicID = SIMDIntrinsicSetY;
10863 simdIntrinsicID = SIMDIntrinsicSetZ;
10866 simdIntrinsicID = SIMDIntrinsicSetW;
10869 noway_assert(!"There is no set intrinsic for index bigger than 3");
10872 GenTreePtr target = gtClone(simdOp1Struct);
10873 assert(target != nullptr);
10874 GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10875 tree->gtOp.gtOp1 = target;
10876 tree->gtOp.gtOp2 = simdTree;
10878 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10885 #endif // FEATURE_SIMD
10887 /*****************************************************************************
10889 * Transform the given GTK_SMPOP tree for code generation.
10893 #pragma warning(push)
10894 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10896 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10898 // this extra scope is a workaround for a gcc bug
10899 // the inline destructor for ALLOCA_CHECK confuses the control
10900 // flow and gcc thinks that the function never returns
10903 assert(tree->OperKind() & GTK_SMPOP);
10905 /* The steps in this function are :
10906 o Perform required preorder processing
10907 o Process the first, then second operand, if any
10908 o Perform required postorder morphing
10909 o Perform optional postorder morphing if optimizing
10912 bool isQmarkColon = false;
10914 #if LOCAL_ASSERTION_PROP
10915 AssertionIndex origAssertionCount = DUMMY_INIT(0);
10916 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
10918 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10919 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
10924 tree = fgMorphForRegisterFP(tree);
10927 genTreeOps oper = tree->OperGet();
10928 var_types typ = tree->TypeGet();
10929 GenTreePtr op1 = tree->gtOp.gtOp1;
10930 GenTreePtr op2 = tree->gtGetOp2IfPresent();
10932 /*-------------------------------------------------------------------------
10933 * First do any PRE-ORDER processing
10938 // Some arithmetic operators need to use a helper call to the EE
10942 tree = fgDoNormalizeOnStore(tree);
10943 /* fgDoNormalizeOnStore can change op2 */
10944 noway_assert(op1 == tree->gtOp.gtOp1);
10945 op2 = tree->gtOp.gtOp2;
10947 #ifdef FEATURE_SIMD
10949 // We should check whether op2 should be assigned to a SIMD field or not.
10950 // If it is, we should tranlate the tree to simd intrinsic.
10951 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10952 GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10953 typ = tree->TypeGet();
10954 op1 = tree->gtGetOp1();
10955 op2 = tree->gtGetOp2();
10957 assert((tree == newTree) && (tree->OperGet() == oper));
10958 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10960 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10983 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10984 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
10985 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10986 // TODO-1stClassStructs: improve this.
10987 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10989 op1->gtFlags |= GTF_DONT_CSE;
10995 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10996 op1->gtFlags |= GTF_DONT_CSE;
11004 if (op1->OperKind() & GTK_RELOP)
11006 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11007 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11008 not need to materialize the result as a 0 or 1. */
11010 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11011 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11013 // Request that the codegen for op1 sets the condition flags
11014 // when it generates the code for op1.
11016 // Codegen for op1 must set the condition flags if
11017 // this method returns true.
11019 op1->gtRequestSetFlags();
11023 GenTreePtr effOp1 = op1->gtEffectiveVal();
11024 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11025 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11030 #if LOCAL_ASSERTION_PROP
11031 if (optLocalAssertionProp)
11034 isQmarkColon = true;
11039 return fgMorphArrayIndex(tree);
11042 return fgMorphCast(tree);
11046 #ifndef _TARGET_64BIT_
11047 if (typ == TYP_LONG)
11049 /* For (long)int1 * (long)int2, we dont actually do the
11050 casts, and just multiply the 32 bit values, which will
11051 give us the 64 bit result in edx:eax */
11054 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11055 genActualType(op1->CastFromType()) == TYP_INT &&
11056 genActualType(op2->CastFromType()) == TYP_INT) &&
11057 !op1->gtOverflow() && !op2->gtOverflow())
11059 // The casts have to be of the same signedness.
11060 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11062 // We see if we can force an int constant to change its signedness
11063 GenTreePtr constOp;
11064 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11066 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11069 goto NO_MUL_64RSLT;
11071 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11072 constOp->gtFlags ^= GTF_UNSIGNED;
11074 goto NO_MUL_64RSLT;
11077 // The only combination that can overflow
11078 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11079 goto NO_MUL_64RSLT;
11081 /* Remaining combinations can never overflow during long mul. */
11083 tree->gtFlags &= ~GTF_OVERFLOW;
11085 /* Do unsigned mul only if the casts were unsigned */
11087 tree->gtFlags &= ~GTF_UNSIGNED;
11088 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11090 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11091 the casts to be folded away. So morph the castees directly */
11093 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11094 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11096 // Propagate side effect flags up the tree
11097 op1->gtFlags &= ~GTF_ALL_EFFECT;
11098 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11099 op2->gtFlags &= ~GTF_ALL_EFFECT;
11100 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11102 // If the GT_MUL can be altogether folded away, we should do that.
11104 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11105 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11107 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11108 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11109 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11110 tree = gtFoldExprConst(tree);
11111 noway_assert(tree->OperIsConst());
11115 tree->gtFlags |= GTF_MUL_64RSLT;
11117 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11118 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11120 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11121 // And propagate the new flags. We don't want to CSE the casts because
11122 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11124 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11126 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11127 op1->gtFlags &= ~GTF_ALL_EFFECT;
11128 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11131 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11133 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11134 op2->gtFlags &= ~GTF_ALL_EFFECT;
11135 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11138 op1->gtFlags |= GTF_DONT_CSE;
11139 op2->gtFlags |= GTF_DONT_CSE;
11141 tree->gtFlags &= ~GTF_ALL_EFFECT;
11142 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11144 goto DONE_MORPHING_CHILDREN;
11146 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11149 if (tree->gtOverflow())
11150 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11152 helper = CORINFO_HELP_LMUL;
11154 goto USE_HELPER_FOR_ARITH;
11158 /* We are seeing this node again. We have decided to use
11159 GTF_MUL_64RSLT, so leave it alone. */
11161 assert(tree->gtIsValid64RsltMul());
11164 #endif // !_TARGET_64BIT_
11169 #ifndef _TARGET_64BIT_
11170 if (typ == TYP_LONG)
11172 helper = CORINFO_HELP_LDIV;
11173 goto USE_HELPER_FOR_ARITH;
11176 #if USE_HELPERS_FOR_INT_DIV
11177 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
11179 helper = CORINFO_HELP_DIV;
11180 goto USE_HELPER_FOR_ARITH;
11183 #endif // !_TARGET_64BIT_
11185 #ifndef LEGACY_BACKEND
11186 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11188 op2 = gtFoldExprConst(op2);
11190 #endif // !LEGACY_BACKEND
11195 #ifndef _TARGET_64BIT_
11196 if (typ == TYP_LONG)
11198 helper = CORINFO_HELP_ULDIV;
11199 goto USE_HELPER_FOR_ARITH;
11201 #if USE_HELPERS_FOR_INT_DIV
11202 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
11204 helper = CORINFO_HELP_UDIV;
11205 goto USE_HELPER_FOR_ARITH;
11208 #endif // _TARGET_64BIT_
11213 if (varTypeIsFloating(typ))
11215 helper = CORINFO_HELP_DBLREM;
11217 if (op1->TypeGet() == TYP_FLOAT)
11219 if (op2->TypeGet() == TYP_FLOAT)
11221 helper = CORINFO_HELP_FLTREM;
11225 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
11228 else if (op2->TypeGet() == TYP_FLOAT)
11230 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
11232 goto USE_HELPER_FOR_ARITH;
11235 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11236 // A similar optimization for signed mod will not work for a negative perfectly divisible
11237 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11238 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11239 goto ASSIGN_HELPER_FOR_MOD;
11243 #ifdef _TARGET_ARMARCH_
11245 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
11247 #else // _TARGET_XARCH
11248 /* If this is an unsigned long mod with op2 which is a cast to long from a
11249 constant int, then don't morph to a call to the helper. This can be done
11250 faster inline using idiv.
11254 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11255 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11256 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11258 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11259 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11260 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11261 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11263 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11264 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11267 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11268 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11270 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11271 noway_assert(op1->TypeGet() == TYP_LONG);
11273 // Update flags for op1 morph
11274 tree->gtFlags &= ~GTF_ALL_EFFECT;
11276 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11278 // If op1 is a constant, then do constant folding of the division operator
11279 if (op1->gtOper == GT_CNS_NATIVELONG)
11281 tree = gtFoldExpr(tree);
11286 #endif // _TARGET_XARCH
11288 ASSIGN_HELPER_FOR_MOD:
11290 // For "val % 1", return 0 if op1 doesn't have any side effects
11291 // and we are not in the CSE phase, we cannot discard 'tree'
11292 // because it may contain CSE expressions that we haven't yet examined.
11294 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11296 if (op2->IsIntegralConst(1))
11298 GenTreePtr zeroNode = gtNewZeroConNode(typ);
11300 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11302 DEBUG_DESTROY_NODE(tree);
11307 #ifndef _TARGET_64BIT_
11308 if (typ == TYP_LONG)
11310 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11311 goto USE_HELPER_FOR_ARITH;
11314 #if USE_HELPERS_FOR_INT_DIV
11315 if (typ == TYP_INT)
11317 if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11319 helper = CORINFO_HELP_UMOD;
11320 goto USE_HELPER_FOR_ARITH;
11322 else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11324 helper = CORINFO_HELP_MOD;
11325 goto USE_HELPER_FOR_ARITH;
11329 #endif // !_TARGET_64BIT_
11331 #ifndef LEGACY_BACKEND
11332 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11334 op2 = gtFoldExprConst(op2);
11337 #ifdef _TARGET_ARM64_
11339 // For ARM64 we don't have a remainder instruction,
11340 // The architecture manual suggests the following transformation to
11341 // generate code for such operator:
11343 // a % b = a - (a / b) * b;
11345 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11346 // should already have done so and we do not introduce new modulus nodes in later phases.
11347 assert(!optValnumCSE_phase);
11348 tree = fgMorphModToSubMulDiv(tree->AsOp());
11349 op1 = tree->gtOp.gtOp1;
11350 op2 = tree->gtOp.gtOp2;
11351 #else //_TARGET_ARM64_
11352 // If b is not a power of 2 constant then lowering replaces a % b
11353 // with a - (a / b) * b and applies magic division optimization to
11354 // a / b. The code may already contain an a / b expression (e.g.
11355 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11356 // If we convert % to / here we give CSE the opportunity to eliminate
11357 // the redundant division. If there's no redundant division then
11358 // nothing is lost, lowering would have done this transform anyway.
11360 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11362 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11363 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11364 : static_cast<size_t>(abs(divisorValue));
11366 if (!isPow2(absDivisorValue))
11368 tree = fgMorphModToSubMulDiv(tree->AsOp());
11369 op1 = tree->gtOp.gtOp1;
11370 op2 = tree->gtOp.gtOp2;
11373 #endif //_TARGET_ARM64_
11374 #endif // !LEGACY_BACKEND
11377 USE_HELPER_FOR_ARITH:
11379 /* We have to morph these arithmetic operations into helper calls
11380 before morphing the arguments (preorder), else the arguments
11381 won't get correct values of fgPtrArgCntCur.
11382 However, try to fold the tree first in case we end up with a
11383 simple node which won't need a helper call at all */
11385 noway_assert(tree->OperIsBinary());
11387 GenTreePtr oldTree = tree;
11389 tree = gtFoldExpr(tree);
11391 // Were we able to fold it ?
11392 // Note that gtFoldExpr may return a non-leaf even if successful
11393 // e.g. for something like "expr / 1" - see also bug #290853
11394 if (tree->OperIsLeaf() || (oldTree != tree))
11396 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11399 // Did we fold it into a comma node with throw?
11400 if (tree->gtOper == GT_COMMA)
11402 noway_assert(fgIsCommaThrow(tree));
11403 return fgMorphTree(tree);
11406 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11409 // normalize small integer return values
11410 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11411 (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11413 // Small-typed return values are normalized by the callee
11414 op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11416 // Propagate GTF_COLON_COND
11417 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11419 tree->gtOp.gtOp1 = fgMorphCast(op1);
11421 // Propagate side effect flags
11422 tree->gtFlags &= ~GTF_ALL_EFFECT;
11423 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11432 // Check for typeof(...) == obj.GetType()
11433 // Also check for typeof(...) == typeof(...)
11434 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11435 // type handles and instances of System.Type
11436 // If this invariant is ever broken, the optimization will need updating
11437 CLANG_FORMAT_COMMENT_ANCHOR;
11439 #ifdef LEGACY_BACKEND
11440 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11441 ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11442 (op1->gtCall.gtCallType == CT_HELPER)) &&
11443 ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11444 (op2->gtCall.gtCallType == CT_HELPER)))
11446 if ((((op1->gtOper == GT_INTRINSIC) &&
11447 (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11448 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11449 (((op2->gtOper == GT_INTRINSIC) &&
11450 (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11451 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11454 GenTreePtr pGetClassFromHandle;
11455 GenTreePtr pGetType;
11457 #ifdef LEGACY_BACKEND
11458 bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11459 bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11461 bool bOp1ClassFromHandle =
11462 op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11463 bool bOp2ClassFromHandle =
11464 op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11467 // Optimize typeof(...) == typeof(...)
11468 // Typically this occurs in generic code that attempts a type switch
11469 // e.g. typeof(T) == typeof(int)
11471 if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11473 GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11474 GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11476 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11478 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11480 // Morph and return
11481 return fgMorphTree(compare);
11483 else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11486 // Now check for GetClassFromHandle(handle) == obj.GetType()
11489 if (bOp1ClassFromHandle)
11491 pGetClassFromHandle = tree->gtOp.gtOp1;
11496 pGetClassFromHandle = tree->gtOp.gtOp2;
11500 GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11501 GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
11503 // Unwrap GT_NOP node used to prevent constant folding
11504 if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11506 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11509 // In the ngen case, we have to go thru an indirection to get the right handle.
11510 if (pConstLiteral->gtOper == GT_IND)
11512 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11514 #ifdef LEGACY_BACKEND
11516 if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11517 info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11518 CORINFO_INTRINSIC_Object_GetType &&
11520 if ((pGetType->gtOper == GT_INTRINSIC) &&
11521 (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11523 pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11525 CORINFO_CLASS_HANDLE clsHnd =
11526 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11528 if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11530 // Method Table tree
11531 CLANG_FORMAT_COMMENT_ANCHOR;
11532 #ifdef LEGACY_BACKEND
11533 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11535 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11537 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11538 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11539 optMethodFlags |= OMF_HAS_VTABLEREF;
11541 // Method table constant
11542 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11544 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11546 compare->gtFlags |=
11547 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11549 // Morph and return
11550 return fgMorphTree(compare);
11560 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
11561 if ((oper != GT_GT) || tree->IsUnsigned())
11563 fgMorphRecognizeBoxNullable(tree);
11566 op1 = tree->gtOp.gtOp1;
11567 op2 = tree->gtGetOp2IfPresent();
11571 #ifdef _TARGET_ARM_
11573 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11575 switch (tree->TypeGet())
11578 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11580 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11592 #if !CPU_HAS_FP_SUPPORT
11593 tree = fgMorphToEmulatedFP(tree);
11596 /* Could this operator throw an exception? */
11597 if (fgGlobalMorph && tree->OperMayThrow())
11599 if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11601 /* Mark the tree node as potentially throwing an exception */
11602 tree->gtFlags |= GTF_EXCEPT;
11606 /*-------------------------------------------------------------------------
11607 * Process the first operand, if any
11613 #if LOCAL_ASSERTION_PROP
11614 // If we are entering the "then" part of a Qmark-Colon we must
11615 // save the state of the current copy assignment table
11616 // so that we can restore this state when entering the "else" part
11619 noway_assert(optLocalAssertionProp);
11620 if (optAssertionCount)
11622 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11623 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11624 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11625 origAssertionCount = optAssertionCount;
11626 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11630 origAssertionCount = 0;
11631 origAssertionTab = nullptr;
11634 #endif // LOCAL_ASSERTION_PROP
11636 // We might need a new MorphAddressContext context. (These are used to convey
11637 // parent context about how addresses being calculated will be used; see the
11638 // specification comment for MorphAddrContext for full details.)
11639 // Assume it's an Ind context to start.
11640 MorphAddrContext subIndMac1(MACK_Ind);
11641 MorphAddrContext* subMac1 = mac;
11642 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11644 switch (tree->gtOper)
11647 if (subMac1 == nullptr)
11649 subMac1 = &subIndMac1;
11650 subMac1->m_kind = MACK_Addr;
11654 // In a comma, the incoming context only applies to the rightmost arg of the
11655 // comma list. The left arg (op1) gets a fresh context.
11662 subMac1 = &subIndMac1;
11669 // For additions, if we're in an IND context keep track of whether
11670 // all offsets added to the address are constant, and their sum.
11671 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11673 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11674 GenTreePtr otherOp = tree->gtOp.gtOp2;
11675 // Is the other operator a constant?
11676 if (otherOp->IsCnsIntOrI())
11678 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11679 totalOffset += otherOp->gtIntConCommon.IconValue();
11680 if (totalOffset.IsOverflow())
11682 // We will consider an offset so large as to overflow as "not a constant" --
11683 // we will do a null check.
11684 subMac1->m_allConstantOffsets = false;
11688 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11693 subMac1->m_allConstantOffsets = false;
11697 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11698 // its parent is GT_ADDR, since the address of the field
11699 // is part of an ongoing address computation. Otherwise
11700 // op1 represents the value of the field and so any address
11701 // calculations it does are in a new context.
11702 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11706 // The impact of this field's value to any ongoing
11707 // address computation is handled below when looking
11711 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11713 #if LOCAL_ASSERTION_PROP
11714 // If we are exiting the "then" part of a Qmark-Colon we must
11715 // save the state of the current copy assignment table
11716 // so that we can merge this state with the "else" part exit
11719 noway_assert(optLocalAssertionProp);
11720 if (optAssertionCount)
11722 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11723 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11724 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11725 thenAssertionCount = optAssertionCount;
11726 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11730 thenAssertionCount = 0;
11731 thenAssertionTab = nullptr;
11734 #endif // LOCAL_ASSERTION_PROP
11736 /* Morphing along with folding and inlining may have changed the
11737 * side effect flags, so we have to reset them
11739 * NOTE: Don't reset the exception flags on nodes that may throw */
11741 assert(tree->gtOper != GT_CALL);
11743 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11745 tree->gtFlags &= ~GTF_CALL;
11748 if (!tree->OperMayThrow())
11750 tree->gtFlags &= ~GTF_EXCEPT;
11753 /* Propagate the new flags */
11754 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11756 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11757 // Similarly for clsVar
11758 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11760 tree->gtFlags &= ~GTF_GLOB_REF;
11764 /*-------------------------------------------------------------------------
11765 * Process the second operand, if any
11771 #if LOCAL_ASSERTION_PROP
11772 // If we are entering the "else" part of a Qmark-Colon we must
11773 // reset the state of the current copy assignment table
11776 noway_assert(optLocalAssertionProp);
11777 optAssertionReset(0);
11778 if (origAssertionCount)
11780 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11781 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11782 optAssertionReset(origAssertionCount);
11785 #endif // LOCAL_ASSERTION_PROP
11787 // We might need a new MorphAddressContext context to use in evaluating op2.
11788 // (These are used to convey parent context about how addresses being calculated
11789 // will be used; see the specification comment for MorphAddrContext for full details.)
11790 // Assume it's an Ind context to start.
11791 switch (tree->gtOper)
11794 if (mac != nullptr && mac->m_kind == MACK_Ind)
11796 GenTreePtr otherOp = tree->gtOp.gtOp1;
11797 // Is the other operator a constant?
11798 if (otherOp->IsCnsIntOrI())
11800 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11804 mac->m_allConstantOffsets = false;
11812 // If gtOp2 is a GT_FIELD, we must be taking its value,
11813 // so it should evaluate its address in a new context.
11814 if (op2->gtOper == GT_FIELD)
11816 // The impact of this field's value to any ongoing
11817 // address computation is handled above when looking
11822 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11824 /* Propagate the side effect flags from op2 */
11826 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11828 #if LOCAL_ASSERTION_PROP
11829 // If we are exiting the "else" part of a Qmark-Colon we must
11830 // merge the state of the current copy assignment table with
11831 // that of the exit of the "then" part.
11834 noway_assert(optLocalAssertionProp);
11835 // If either exit table has zero entries then
11836 // the merged table also has zero entries
11837 if (optAssertionCount == 0 || thenAssertionCount == 0)
11839 optAssertionReset(0);
11843 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11844 if ((optAssertionCount != thenAssertionCount) ||
11845 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11847 // Yes they are different so we have to find the merged set
11848 // Iterate over the copy asgn table removing any entries
11849 // that do not have an exact match in the thenAssertionTab
11850 AssertionIndex index = 1;
11851 while (index <= optAssertionCount)
11853 AssertionDsc* curAssertion = optGetAssertion(index);
11855 for (unsigned j = 0; j < thenAssertionCount; j++)
11857 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11859 // Do the left sides match?
11860 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11861 (curAssertion->assertionKind == thenAssertion->assertionKind))
11863 // Do the right sides match?
11864 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11865 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11876 // If we fall out of the loop above then we didn't find
11877 // any matching entry in the thenAssertionTab so it must
11878 // have been killed on that path so we remove it here
11881 // The data at optAssertionTabPrivate[i] is to be removed
11882 CLANG_FORMAT_COMMENT_ANCHOR;
11886 printf("The QMARK-COLON ");
11888 printf(" removes assertion candidate #%d\n", index);
11891 optAssertionRemove(index);
11894 // The data at optAssertionTabPrivate[i] is to be kept
11900 #endif // LOCAL_ASSERTION_PROP
11903 DONE_MORPHING_CHILDREN:
11905 /*-------------------------------------------------------------------------
11906 * Now do POST-ORDER processing
11909 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11910 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11911 // is potentially too conservative, since they'll get treated as having side effects.
11912 // It is important to mark them as calls so if they are part of an argument list,
11913 // they will get sorted and processed properly (for example, it is important to handle
11914 // all nested calls before putting struct arguments in the argument registers). We
11915 // could mark the trees just before argument processing, but it would require a full
11916 // tree walk of the argument tree, so we just do it here, instead, even though we'll
11917 // mark non-argument trees (that will still get converted to calls, anyway).
11918 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11920 tree->gtFlags |= GTF_CALL;
11922 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11924 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11925 (op2 && !varTypeIsGC(op2->TypeGet())))
11927 // The tree is really not GC but was marked as such. Now that the
11928 // children have been unmarked, unmark the tree too.
11930 // Remember that GT_COMMA inherits it's type only from op2
11931 if (tree->gtOper == GT_COMMA)
11933 tree->gtType = genActualType(op2->TypeGet());
11937 tree->gtType = genActualType(op1->TypeGet());
11941 GenTreePtr oldTree = tree;
11943 GenTreePtr qmarkOp1 = nullptr;
11944 GenTreePtr qmarkOp2 = nullptr;
11946 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11948 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11949 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11952 // Try to fold it, maybe we get lucky,
11953 tree = gtFoldExpr(tree);
11955 if (oldTree != tree)
11957 /* if gtFoldExpr returned op1 or op2 then we are done */
11958 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11963 /* If we created a comma-throw tree then we need to morph op1 */
11964 if (fgIsCommaThrow(tree))
11966 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11967 fgMorphTreeDone(tree);
11973 else if (tree->OperKind() & GTK_CONST)
11978 /* gtFoldExpr could have used setOper to change the oper */
11979 oper = tree->OperGet();
11980 typ = tree->TypeGet();
11982 /* gtFoldExpr could have changed op1 and op2 */
11983 op1 = tree->gtOp.gtOp1;
11984 op2 = tree->gtGetOp2IfPresent();
11986 // Do we have an integer compare operation?
11988 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11990 // Are we comparing against zero?
11992 if (op2->IsIntegralConst(0))
11994 // Request that the codegen for op1 sets the condition flags
11995 // when it generates the code for op1.
11997 // Codegen for op1 must set the condition flags if
11998 // this method returns true.
12000 op1->gtRequestSetFlags();
12003 /*-------------------------------------------------------------------------
12004 * Perform the required oper-specific postorder morphing
12008 GenTreePtr cns1, cns2;
12009 GenTreePtr thenNode;
12010 GenTreePtr elseNode;
12011 size_t ival1, ival2;
12012 GenTreePtr lclVarTree;
12013 GenTreeLclVarCommon* lclVarCmnTree;
12014 FieldSeqNode* fieldSeq = nullptr;
12020 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12021 if (lclVarTree != nullptr)
12023 lclVarTree->gtFlags |= GTF_VAR_DEF;
12026 if (op1->gtEffectiveVal()->OperIsConst())
12028 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12029 tree->gtOp.gtOp1 = op1;
12032 /* If we are storing a small type, we might be able to omit a cast */
12033 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
12035 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12037 var_types castType = op2->CastToType();
12039 // If we are performing a narrowing cast and
12040 // castType is larger or the same as op1's type
12041 // then we can discard the cast.
12043 if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
12045 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12048 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
12050 /* We don't need to zero extend the setcc instruction */
12051 op2->gtType = TYP_BYTE;
12054 // If we introduced a CSE we may need to undo the optimization above
12055 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12056 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12057 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12059 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
12060 LclVarDsc* varDsc = &lvaTable[varNum];
12062 /* We again need to zero extend the setcc instruction */
12063 op2->gtType = varDsc->TypeGet();
12065 fgAssignSetVarDef(tree);
12083 /* We can't CSE the LHS of an assignment */
12084 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12085 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12087 op1->gtFlags |= GTF_DONT_CSE;
12094 /* Make sure we're allowed to do this */
12096 if (optValnumCSE_phase)
12098 // It is not safe to reorder/delete CSE's
12104 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12106 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12108 op1 = tree->gtOp.gtOp1;
12110 /* Since this can occur repeatedly we use a while loop */
12112 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
12113 (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
12114 (op1->gtOverflow() == false))
12116 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12118 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12119 ival2 = cns2->gtIntCon.gtIconVal;
12121 if (op1->gtOper == GT_ADD)
12129 cns2->gtIntCon.gtIconVal = ival2;
12131 #ifdef _TARGET_64BIT_
12132 // we need to properly re-sign-extend or truncate as needed.
12133 cns2->AsIntCon()->TruncateOrSignExtend32();
12134 #endif // _TARGET_64BIT_
12136 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12141 // Here we look for the following tree
12147 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12149 // cast to unsigned allows test for both 0 and 1
12150 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12152 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12154 else // cast to UINT64 allows test for both 0 and 1
12155 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12157 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12160 if (ival2 != INT_MAX)
12162 // If we don't have a comma and relop, we can't do this optimization
12164 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12166 // Here we look for the following transformation
12168 // EQ/NE Possible REVERSE(RELOP)
12170 // COMMA CNS 0/1 -> COMMA relop_op2
12172 // x RELOP x relop_op1
12174 // relop_op1 relop_op2
12178 GenTreePtr comma = op1;
12179 GenTreePtr relop = comma->gtOp.gtOp2;
12181 GenTreePtr relop_op1 = relop->gtOp.gtOp1;
12183 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12187 gtReverseCond(relop);
12190 relop->gtOp.gtOp1 = comma;
12191 comma->gtOp.gtOp2 = relop_op1;
12193 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12194 comma->gtFlags &= ~GTF_ALL_EFFECT;
12195 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12196 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12198 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12199 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12201 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12206 if (op1->gtOper == GT_COMMA)
12208 // Here we look for the following tree
12209 // and when the LCL_VAR is a temp we can fold the tree:
12213 // COMMA CNS 0/1 -> RELOP CNS 0/1
12221 GenTreePtr asg = op1->gtOp.gtOp1;
12222 GenTreePtr lcl = op1->gtOp.gtOp2;
12224 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12225 if (asg->gtOper != GT_ASG)
12230 /* The right side of the comma must be a LCL_VAR temp */
12231 if (lcl->gtOper != GT_LCL_VAR)
12236 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12237 noway_assert(lclNum < lvaCount);
12239 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12240 if (!lvaTable[lclNum].lvIsTemp)
12246 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12247 // Fix 383856 X86/ARM ILGEN
12248 if (lclNumIsCSE(lclNum))
12254 /* We also must be assigning the result of a RELOP */
12255 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12260 /* Both of the LCL_VAR must match */
12261 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12266 /* If right side of asg is not a RELOP then skip */
12267 if (!asg->gtOp.gtOp2->OperIsCompare())
12272 LclVarDsc* varDsc = lvaTable + lclNum;
12274 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12275 op1 = asg->gtOp.gtOp2;
12277 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12278 DEBUG_DESTROY_NODE(lcl);
12280 /* This local variable should never be used again */
12282 // VSW 184221: Make RefCnt to zero to indicate that this local var
12283 // is not used any more. (Keey the lvType as is.)
12284 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12285 // And then emitter::emitEndCodeGen will assert in the following line:
12286 // noway_assert( dsc->lvTracked);
12288 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12289 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
12290 // and it only shows up twice.
12292 lvaTable[lclNum].lvRefCnt = 0;
12293 lvaTable[lclNum].lvaResetSortAgainFlag(this);
12296 if (op1->OperIsCompare())
12298 // Here we look for the following tree
12300 // EQ/NE -> RELOP/!RELOP
12305 // Note that we will remove/destroy the EQ/NE node and move
12306 // the RELOP up into it's location.
12308 /* Here we reverse the RELOP if necessary */
12310 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12314 gtReverseCond(op1);
12317 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12318 op1->gtType = tree->gtType;
12320 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12321 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12323 DEBUG_DESTROY_NODE(tree);
12328 // Now we check for a compare with the result of an '&' operator
12330 // Here we look for the following transformation:
12334 // AND CNS 0/1 -> AND CNS 0
12336 // RSZ/RSH CNS 1 x CNS (1 << y)
12340 if (op1->gtOper == GT_AND)
12342 GenTreePtr andOp = op1;
12343 GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12345 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12350 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12355 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12357 if (shiftAmount < 0)
12362 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12367 if (andOp->gtType == TYP_INT)
12369 if (shiftAmount > 31)
12374 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12376 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12378 // Reverse the cond if necessary
12381 gtReverseCond(tree);
12382 cns2->gtIntCon.gtIconVal = 0;
12383 oper = tree->gtOper;
12386 else if (andOp->gtType == TYP_LONG)
12388 if (shiftAmount > 63)
12393 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12395 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12397 // Reverse the cond if necessary
12400 gtReverseCond(tree);
12401 cns2->gtIntConCommon.SetLngValue(0);
12402 oper = tree->gtOper;
12406 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12408 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12409 DEBUG_DESTROY_NODE(rshiftOp);
12411 } // END if (ival2 != INT_MAX)
12414 /* Now check for compares with small constant longs that can be cast to int */
12416 if (!cns2->OperIsConst())
12421 if (cns2->TypeGet() != TYP_LONG)
12426 /* Is the constant 31 bits or smaller? */
12428 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12433 /* Is the first comparand mask operation of type long ? */
12435 if (op1->gtOper != GT_AND)
12437 /* Another interesting case: cast from int */
12439 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12440 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12441 !op1->gtOverflow()) // cannot be an overflow checking cast
12443 /* Simply make this into an integer comparison */
12445 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12446 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12452 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12454 /* Is the result of the mask effectively an INT ? */
12456 GenTreePtr andMask;
12457 andMask = op1->gtOp.gtOp2;
12458 if (andMask->gtOper != GT_CNS_NATIVELONG)
12462 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12467 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12469 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12471 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12473 noway_assert(andMask == op1->gtOp.gtOp2);
12475 ival1 = (int)andMask->gtIntConCommon.LngValue();
12476 andMask->SetOper(GT_CNS_INT);
12477 andMask->gtType = TYP_INT;
12478 andMask->gtIntCon.gtIconVal = ival1;
12480 /* now change the type of the AND node */
12482 op1->gtType = TYP_INT;
12484 /* finally we replace the comparand */
12486 ival2 = (int)cns2->gtIntConCommon.LngValue();
12487 cns2->SetOper(GT_CNS_INT);
12488 cns2->gtType = TYP_INT;
12490 noway_assert(cns2 == op2);
12491 cns2->gtIntCon.gtIconVal = ival2;
12500 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12502 if (op2->gtOper == GT_CNS_INT)
12505 /* Check for "expr relop 1" */
12506 if (cns2->IsIntegralConst(1))
12508 /* Check for "expr >= 1" */
12511 /* Change to "expr > 0" */
12515 /* Check for "expr < 1" */
12516 else if (oper == GT_LT)
12518 /* Change to "expr <= 0" */
12523 /* Check for "expr relop -1" */
12524 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12526 /* Check for "expr <= -1" */
12529 /* Change to "expr < 0" */
12533 /* Check for "expr > -1" */
12534 else if (oper == GT_GT)
12536 /* Change to "expr >= 0" */
12540 // IF we get here we should be changing 'oper'
12541 assert(tree->OperGet() != oper);
12543 // Keep the old ValueNumber for 'tree' as the new expr
12544 // will still compute the same value as before
12545 tree->SetOper(oper, GenTree::PRESERVE_VN);
12546 cns2->gtIntCon.gtIconVal = 0;
12548 // vnStore is null before the ValueNumber phase has run
12549 if (vnStore != nullptr)
12551 // Update the ValueNumber for 'cns2', as we just changed it to 0
12552 fgValueNumberTreeConst(cns2);
12555 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12560 else // we have an unsigned comparison
12562 if (op2->IsIntegralConst(0))
12564 if ((oper == GT_GT) || (oper == GT_LE))
12566 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12567 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12568 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12569 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12570 // occurs as a result of branch inversion.
12571 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12572 tree->SetOper(oper, GenTree::PRESERVE_VN);
12573 tree->gtFlags &= ~GTF_UNSIGNED;
12580 noway_assert(tree->OperKind() & GTK_RELOP);
12582 #ifdef LEGACY_BACKEND
12583 /* Check if the result of the comparison is used for a jump.
12584 * If not then only the int (i.e. 32 bit) case is handled in
12585 * the code generator through the (x86) "set" instructions.
12586 * For the rest of the cases, the simplest way is to
12587 * "simulate" the comparison with ?:
12589 * On ARM, we previously used the IT instruction, but the IT instructions
12590 * have mostly been declared obsolete and off-limits, so all cases on ARM
12591 * get converted to ?: */
12593 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12595 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12598 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12599 op1->gtRequestSetFlags();
12601 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12602 op2 = fgMorphTree(op2);
12604 tree = gtNewQmarkNode(TYP_INT, op1, op2);
12606 fgMorphTreeDone(tree);
12610 #endif // LEGACY_BACKEND
12613 #ifdef LEGACY_BACKEND
12616 /* If op1 is a comma throw node then we won't be keeping op2 */
12617 if (fgIsCommaThrow(op1))
12622 /* Get hold of the two branches */
12624 noway_assert(op2->OperGet() == GT_COLON);
12625 elseNode = op2->AsColon()->ElseNode();
12626 thenNode = op2->AsColon()->ThenNode();
12628 /* Try to hoist assignments out of qmark colon constructs.
12629 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12631 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12632 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12633 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12635 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12637 GenTreePtr asg = thenNode;
12638 GenTreePtr colon = op2;
12639 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12640 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12641 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12642 asg->gtOp.gtOp2 = tree;
12644 // Asg will have all the flags that the QMARK had
12645 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12647 // Colon flag won't have the flags that x had.
12648 colon->gtFlags &= ~GTF_ALL_EFFECT;
12649 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12651 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12652 DEBUG_DESTROY_NODE(elseNode);
12657 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12659 if (elseNode->IsNothingNode())
12661 /* This can only happen for VOID ?: */
12662 noway_assert(op2->gtType == TYP_VOID);
12664 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12665 if (thenNode->IsNothingNode())
12667 // We may be able to throw away op1 (unless it has side-effects)
12669 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12671 /* Just return a a Nop Node */
12676 /* Just return the relop, but clear the special flags. Note
12677 that we can't do that for longs and floats (see code under
12678 COMPARE label above) */
12680 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12682 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12689 GenTreePtr tmp = elseNode;
12691 op2->AsColon()->ElseNode() = elseNode = thenNode;
12692 op2->AsColon()->ThenNode() = thenNode = tmp;
12693 gtReverseCond(op1);
12697 #if !defined(_TARGET_ARM_)
12698 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12700 // Don't do this optimization for ARM: we always require assignment
12701 // to boolean to remain ?:, since we don't have any way to generate
12702 // this with straight-line code, like x86 does using setcc (at least
12703 // after the IT instruction is deprecated).
12705 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12706 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12708 ival1 = thenNode->gtIntCon.gtIconVal;
12709 ival2 = elseNode->gtIntCon.gtIconVal;
12711 // Is one constant 0 and the other 1?
12712 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12714 // If the constants are {1, 0}, reverse the condition
12717 gtReverseCond(op1);
12720 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12721 // needs to materialize the result as a 0 or 1.
12722 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12723 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12725 DEBUG_DESTROY_NODE(tree);
12726 DEBUG_DESTROY_NODE(op2);
12731 #endif // !_TARGET_ARM_
12733 break; // end case GT_QMARK
12734 #endif // LEGACY_BACKEND
12738 #ifndef _TARGET_64BIT_
12739 if (typ == TYP_LONG)
12741 // This must be GTF_MUL_64RSLT
12742 assert(tree->gtIsValid64RsltMul());
12745 #endif // _TARGET_64BIT_
12750 if (tree->gtOverflow())
12755 // TODO #4104: there are a lot of other places where
12756 // this condition is not checked before transformations.
12759 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12762 if (op2->IsCnsIntOrI())
12764 /* Negate the constant and change the node to be "+" */
12766 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12768 tree->ChangeOper(oper);
12772 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12775 if (op1->IsCnsIntOrI())
12777 noway_assert(varTypeIsIntOrI(tree));
12779 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12780 // node should be the same
12781 // as the type of the tree, i.e. tree->gtType.
12782 fgMorphTreeDone(op2);
12785 tree->ChangeOper(oper);
12789 /* No match - exit */
12793 #ifdef _TARGET_ARM64_
12795 if (!varTypeIsFloating(tree->gtType))
12797 // Codegen for this instruction needs to be able to throw two exceptions:
12798 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12799 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12803 // Codegen for this instruction needs to be able to throw one exception:
12804 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12811 if (tree->gtOverflow())
12813 tree->gtRequestSetFlags();
12815 // Add the excptn-throwing basic block to jump to on overflow
12817 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12819 // We can't do any commutative morphing for overflow instructions
12830 /* Commute any non-REF constants to the right */
12833 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12835 // TODO-Review: We used to assert here that
12836 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12837 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12838 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12839 // Task is to re-enable this assertion and investigate.
12841 /* Swap the operands */
12842 tree->gtOp.gtOp1 = op2;
12843 tree->gtOp.gtOp2 = op1;
12846 op2 = tree->gtOp.gtOp2;
12849 /* See if we can fold GT_ADD nodes. */
12851 if (oper == GT_ADD)
12853 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12855 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12856 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12857 !op1->gtOverflow() && !op2->gtOverflow())
12859 cns1 = op1->gtOp.gtOp2;
12860 cns2 = op2->gtOp.gtOp2;
12861 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12862 #ifdef _TARGET_64BIT_
12863 if (cns1->TypeGet() == TYP_INT)
12865 // we need to properly re-sign-extend or truncate after adding two int constants above
12866 cns1->AsIntCon()->TruncateOrSignExtend32();
12868 #endif //_TARGET_64BIT_
12870 tree->gtOp.gtOp2 = cns1;
12871 DEBUG_DESTROY_NODE(cns2);
12873 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12874 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12875 DEBUG_DESTROY_NODE(op2);
12876 op2 = tree->gtOp.gtOp2;
12879 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12881 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12883 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12884 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12886 cns1 = op1->gtOp.gtOp2;
12887 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12888 op2->gtIntConCommon.IconValue());
12889 #ifdef _TARGET_64BIT_
12890 if (op2->TypeGet() == TYP_INT)
12892 // we need to properly re-sign-extend or truncate after adding two int constants above
12893 op2->AsIntCon()->TruncateOrSignExtend32();
12895 #endif //_TARGET_64BIT_
12897 if (cns1->OperGet() == GT_CNS_INT)
12899 op2->gtIntCon.gtFieldSeq =
12900 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12902 DEBUG_DESTROY_NODE(cns1);
12904 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12905 DEBUG_DESTROY_NODE(op1);
12906 op1 = tree->gtOp.gtOp1;
12911 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12914 // If this addition is adding an offset to a null pointer,
12915 // avoid the work and yield the null pointer immediately.
12916 // Dereferencing the pointer in either case will have the
12919 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12920 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12922 op2->gtType = tree->gtType;
12923 DEBUG_DESTROY_NODE(op1);
12924 DEBUG_DESTROY_NODE(tree);
12928 // Remove the addition iff it won't change the tree type
12931 if (!gtIsActiveCSE_Candidate(op2) &&
12932 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12934 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12935 (op2->gtIntCon.gtFieldSeq != nullptr) &&
12936 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12938 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12941 DEBUG_DESTROY_NODE(op2);
12942 DEBUG_DESTROY_NODE(tree);
12949 /* See if we can fold GT_MUL by const nodes */
12950 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12952 #ifndef _TARGET_64BIT_
12953 noway_assert(typ <= TYP_UINT);
12954 #endif // _TARGET_64BIT_
12955 noway_assert(!tree->gtOverflow());
12957 ssize_t mult = op2->gtIntConCommon.IconValue();
12958 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12959 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12961 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12965 // We may be able to throw away op1 (unless it has side-effects)
12967 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12969 DEBUG_DESTROY_NODE(op1);
12970 DEBUG_DESTROY_NODE(tree);
12971 return op2; // Just return the "0" node
12974 // We need to keep op1 for the side-effects. Hang it off
12977 tree->ChangeOper(GT_COMMA);
12981 size_t abs_mult = (mult >= 0) ? mult : -mult;
12982 size_t lowestBit = genFindLowestBit(abs_mult);
12983 bool changeToShift = false;
12985 // is it a power of two? (positive or negative)
12986 if (abs_mult == lowestBit)
12988 // if negative negate (min-int does not need negation)
12989 if (mult < 0 && mult != SSIZE_T_MIN)
12991 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12992 fgMorphTreeDone(op1);
12995 // If "op2" is a constant array index, the other multiplicand must be a constant.
12996 // Transfer the annotation to the other one.
12997 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12998 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
13000 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
13001 GenTreePtr otherOp = op1;
13002 if (otherOp->OperGet() == GT_NEG)
13004 otherOp = otherOp->gtOp.gtOp1;
13006 assert(otherOp->OperGet() == GT_CNS_INT);
13007 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
13008 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13013 DEBUG_DESTROY_NODE(op2);
13014 DEBUG_DESTROY_NODE(tree);
13018 /* Change the multiplication into a shift by log2(val) bits */
13019 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13020 changeToShift = true;
13023 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13025 int shift = genLog2(lowestBit);
13026 ssize_t factor = abs_mult >> shift;
13028 if (factor == 3 || factor == 5 || factor == 9)
13030 // if negative negate (min-int does not need negation)
13031 if (mult < 0 && mult != SSIZE_T_MIN)
13033 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13034 fgMorphTreeDone(op1);
13037 GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13038 if (op2IsConstIndex)
13040 factorIcon->AsIntCon()->gtFieldSeq =
13041 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13044 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13045 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13046 fgMorphTreeDone(op1);
13048 op2->gtIntConCommon.SetIconValue(shift);
13049 changeToShift = true;
13052 #endif // LEA_AVAILABLE
13055 // vnStore is null before the ValueNumber phase has run
13056 if (vnStore != nullptr)
13058 // Update the ValueNumber for 'op2', as we just changed the constant
13059 fgValueNumberTreeConst(op2);
13062 // Keep the old ValueNumber for 'tree' as the new expr
13063 // will still compute the same value as before
13064 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13066 goto DONE_MORPHING_CHILDREN;
13069 else if (fgOperIsBitwiseRotationRoot(oper))
13071 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13073 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13074 oper = tree->OperGet();
13075 typ = tree->TypeGet();
13076 op1 = tree->gtOp.gtOp1;
13077 op2 = tree->gtOp.gtOp2;
13086 /* Any constant cases should have been folded earlier */
13087 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13092 noway_assert(varTypeIsFloating(op1->TypeGet()));
13094 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13098 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13099 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13100 // is a local or clsVar, even if it has been address-exposed.
13101 if (op1->OperGet() == GT_ADDR)
13103 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13109 // Can not remove a GT_IND if it is currently a CSE candidate.
13110 if (gtIsActiveCSE_Candidate(tree))
13115 bool foldAndReturnTemp;
13116 foldAndReturnTemp = false;
13120 /* Try to Fold *(&X) into X */
13121 if (op1->gtOper == GT_ADDR)
13123 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13124 if (gtIsActiveCSE_Candidate(op1))
13129 temp = op1->gtOp.gtOp1; // X
13131 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13132 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13133 // address has an associated field sequence, that identifies this case; go through
13134 // the "lcl_fld" path rather than this one.
13135 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13136 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13138 foldAndReturnTemp = true;
13140 else if (temp->OperIsLocal())
13142 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13143 LclVarDsc* varDsc = &lvaTable[lclNum];
13145 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13146 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13148 noway_assert(varTypeIsStruct(varDsc));
13150 // We will try to optimize when we have a single field struct that is being struct promoted
13151 if (varDsc->lvFieldCnt == 1)
13153 unsigned lclNumFld = varDsc->lvFieldLclStart;
13154 // just grab the promoted field
13155 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13157 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13159 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13161 // We can just use the existing promoted field LclNum
13162 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13163 temp->gtType = fieldVarDsc->TypeGet();
13165 foldAndReturnTemp = true;
13169 // If the type of the IND (typ) is a "small int", and the type of the local has the
13170 // same width, then we can reduce to just the local variable -- it will be
13171 // correctly normalized, and signed/unsigned differences won't matter.
13173 // The below transformation cannot be applied if the local var needs to be normalized on load.
13174 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13175 !lvaTable[lclNum].lvNormalizeOnLoad())
13177 tree->gtType = typ = temp->TypeGet();
13178 foldAndReturnTemp = true;
13182 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13184 assert(fieldSeq == nullptr);
13185 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13186 assert(b || fieldSeq == nullptr);
13188 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13190 // Append the field sequence, change the type.
13191 temp->AsLclFld()->gtFieldSeq =
13192 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13193 temp->gtType = typ;
13195 foldAndReturnTemp = true;
13198 // Otherwise will will fold this into a GT_LCL_FLD below
13199 // where we check (temp != nullptr)
13201 else // !temp->OperIsLocal()
13203 // We don't try to fold away the GT_IND/GT_ADDR for this case
13207 else if (op1->OperGet() == GT_ADD)
13209 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13211 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13212 (!(opts.MinOpts() || opts.compDbgCode)))
13214 // No overflow arithmetic with pointers
13215 noway_assert(!op1->gtOverflow());
13217 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13218 if (!temp->OperIsLocal())
13224 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13225 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13230 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13231 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13233 // Does the address have an associated zero-offset field sequence?
13234 FieldSeqNode* addrFieldSeq = nullptr;
13235 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13237 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13240 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13242 noway_assert(!varTypeIsGC(temp->TypeGet()));
13243 foldAndReturnTemp = true;
13247 // The emitter can't handle large offsets
13248 if (ival1 != (unsigned short)ival1)
13253 // The emitter can get confused by invalid offsets
13254 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13259 #ifdef _TARGET_ARM_
13260 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13262 if (varTypeIsFloating(typ))
13264 if ((ival1 % emitTypeSize(typ)) != 0)
13266 tree->gtFlags |= GTF_IND_UNALIGNED;
13272 // Now we can fold this into a GT_LCL_FLD below
13273 // where we check (temp != nullptr)
13277 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13278 // - We may have a load of a local where the load has a different type than the local
13279 // - We may have a load of a local plus an offset
13281 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13282 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13283 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13284 // out-of-bounds w.r.t. the local).
13285 if ((temp != nullptr) && !foldAndReturnTemp)
13287 assert(temp->OperIsLocal());
13289 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13290 LclVarDsc* const varDsc = &lvaTable[lclNum];
13292 const var_types tempTyp = temp->TypeGet();
13293 const bool useExactSize =
13294 varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13295 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13297 // Make sure we do not enregister this lclVar.
13298 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13300 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13301 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13302 // lclVar and must not extend beyond the end of the lclVar.
13303 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13305 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13306 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13307 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13309 if (temp->OperGet() == GT_LCL_FLD)
13311 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13312 temp->AsLclFld()->gtFieldSeq =
13313 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13317 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13318 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13319 if (fieldSeq != nullptr)
13320 { // If it does represent a field, note that.
13321 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13324 temp->gtType = tree->gtType;
13325 foldAndReturnTemp = true;
13329 if (foldAndReturnTemp)
13331 assert(temp != nullptr);
13332 assert(temp->TypeGet() == typ);
13333 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13335 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13336 // 'temp' because a GT_ADDR always marks it for its operand.
13337 temp->gtFlags &= ~GTF_DONT_CSE;
13338 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13340 if (op1->OperGet() == GT_ADD)
13342 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13343 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13345 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13346 DEBUG_DESTROY_NODE(tree); // GT_IND
13348 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13350 if (temp->OperIs(GT_LCL_VAR))
13353 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13354 // and the node in question must have this bit set (as it has already been morphed).
13355 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13357 const bool forceRemorph = true;
13358 temp = fgMorphLocalVar(temp, forceRemorph);
13360 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13361 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13363 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13370 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13371 // could result in an invalid value number for the newly generated GT_IND node.
13372 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13374 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13375 // TBD: this transformation is currently necessary for correctness -- it might
13376 // be good to analyze the failures that result if we don't do this, and fix them
13377 // in other ways. Ideally, this should be optional.
13378 GenTreePtr commaNode = op1;
13379 unsigned treeFlags = tree->gtFlags;
13380 commaNode->gtType = typ;
13381 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13382 // dangerous, clear the GTF_REVERSE_OPS at
13385 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13387 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13389 commaNode = commaNode->gtOp.gtOp2;
13390 commaNode->gtType = typ;
13391 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13392 // dangerous, clear the GTF_REVERSE_OPS at
13395 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13398 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13402 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13404 GetArrayInfoMap()->Remove(tree);
13407 op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13408 op1->gtFlags = treeFlags;
13411 GetArrayInfoMap()->Set(op1, arrInfo);
13414 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13416 commaNode->gtOp.gtOp2 = op1;
13424 // Can not remove op1 if it is currently a CSE candidate.
13425 if (gtIsActiveCSE_Candidate(op1))
13430 if (op1->OperGet() == GT_IND)
13432 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13434 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13435 if (gtIsActiveCSE_Candidate(tree))
13440 // Perform the transform ADDR(IND(...)) == (...).
13441 GenTreePtr addr = op1->gtOp.gtOp1;
13443 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13445 DEBUG_DESTROY_NODE(op1);
13446 DEBUG_DESTROY_NODE(tree);
13451 else if (op1->OperGet() == GT_OBJ)
13453 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13454 if (gtIsActiveCSE_Candidate(tree))
13459 // Perform the transform ADDR(OBJ(...)) == (...).
13460 GenTreePtr addr = op1->AsObj()->Addr();
13462 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13464 DEBUG_DESTROY_NODE(op1);
13465 DEBUG_DESTROY_NODE(tree);
13469 else if (op1->gtOper == GT_CAST)
13471 GenTreePtr casting = op1->gtCast.CastOp();
13472 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13474 DEBUG_DESTROY_NODE(op1);
13475 tree->gtOp.gtOp1 = op1 = casting;
13478 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13480 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13481 // (Be sure to mark "z" as an l-value...)
13482 GenTreePtr commaNode = op1;
13483 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13485 commaNode = commaNode->gtOp.gtOp2;
13487 // The top-level addr might be annotated with a zeroOffset field.
13488 FieldSeqNode* zeroFieldSeq = nullptr;
13489 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13491 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13493 // If the node we're about to put under a GT_ADDR is an indirection, it
13494 // doesn't need to be materialized, since we only want the addressing mode. Because
13495 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13496 // as a side effect.
13497 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13498 if (commaOp2->OperIsBlk())
13500 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13502 if (commaOp2->gtOper == GT_IND)
13504 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13507 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13511 // Transfer the annotation to the new GT_ADDR node.
13512 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13514 commaNode->gtOp.gtOp2 = op1;
13515 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13516 // might give op1 a type different from byref (like, say, native int). So now go back and give
13517 // all the comma nodes the type of op1.
13518 // TODO: the comma flag update below is conservative and can be improved.
13519 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13520 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13522 while (commaNode->gtOper == GT_COMMA)
13524 commaNode->gtType = op1->gtType;
13525 commaNode->gtFlags |= op1->gtFlags;
13527 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13529 commaNode = commaNode->gtOp.gtOp2;
13535 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13536 op1->gtFlags |= GTF_DONT_CSE;
13542 /* Mark the nodes that are conditionally executed */
13543 fgWalkTreePre(&tree, gtMarkColonCond);
13545 /* Since we're doing this postorder we clear this if it got set by a child */
13546 fgRemoveRestOfBlock = false;
13551 /* Special case: trees that don't produce a value */
13552 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13555 typ = tree->gtType = TYP_VOID;
13558 // If we are in the Valuenum CSE phase then don't morph away anything as these
13559 // nodes may have CSE defs/uses in them.
13561 if (!optValnumCSE_phase)
13563 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13566 GenTreePtr op1SideEffects = nullptr;
13567 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13568 // hoisted expressions in loops.
13569 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13570 if (op1SideEffects)
13572 // Replace the left hand side with the side effect list.
13573 tree->gtOp.gtOp1 = op1SideEffects;
13574 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13578 /* The left operand is worthless, throw it away */
13579 if (lvaLocalVarRefCounted)
13581 lvaRecursiveDecRefCounts(op1);
13583 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13584 DEBUG_DESTROY_NODE(tree);
13585 DEBUG_DESTROY_NODE(op1);
13589 /* If the right operand is just a void nop node, throw it away */
13590 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13592 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13593 DEBUG_DESTROY_NODE(tree);
13594 DEBUG_DESTROY_NODE(op2);
13603 /* Special case if fgRemoveRestOfBlock is set to true */
13604 if (fgRemoveRestOfBlock)
13606 if (fgIsCommaThrow(op1, true))
13608 GenTreePtr throwNode = op1->gtOp.gtOp1;
13609 noway_assert(throwNode->gtType == TYP_VOID);
13614 noway_assert(op1->OperKind() & GTK_RELOP);
13615 noway_assert(op1->gtFlags & GTF_EXCEPT);
13617 // We need to keep op1 for the side-effects. Hang it off
13620 tree->ChangeOper(GT_COMMA);
13621 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13623 // Additionally since we're eliminating the JTRUE
13624 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13625 // So we change it into a GT_COMMA as well.
13626 op1->ChangeOper(GT_COMMA);
13627 op1->gtType = op1->gtOp.gtOp1->gtType;
13636 assert(oper == tree->gtOper);
13638 // If we are in the Valuenum CSE phase then don't morph away anything as these
13639 // nodes may have CSE defs/uses in them.
13641 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13643 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13644 if (op1 && fgIsCommaThrow(op1, true))
13646 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13648 /* We can safely throw out the rest of the statements */
13649 fgRemoveRestOfBlock = true;
13652 GenTreePtr throwNode = op1->gtOp.gtOp1;
13653 noway_assert(throwNode->gtType == TYP_VOID);
13655 if (oper == GT_COMMA)
13657 /* Both tree and op1 are GT_COMMA nodes */
13658 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13659 tree->gtOp.gtOp1 = throwNode;
13662 else if (oper != GT_NOP)
13664 if (genActualType(typ) == genActualType(op1->gtType))
13666 /* The types match so, return the comma throw node as the new tree */
13671 if (typ == TYP_VOID)
13673 // Return the throw node
13678 GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13680 // need type of oper to be same as tree
13681 if (typ == TYP_LONG)
13683 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13684 commaOp2->gtIntConCommon.SetLngValue(0);
13685 /* Change the types of oper and commaOp2 to TYP_LONG */
13686 op1->gtType = commaOp2->gtType = TYP_LONG;
13688 else if (varTypeIsFloating(typ))
13690 commaOp2->ChangeOperConst(GT_CNS_DBL);
13691 commaOp2->gtDblCon.gtDconVal = 0.0;
13692 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13693 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13697 commaOp2->ChangeOperConst(GT_CNS_INT);
13698 commaOp2->gtIntConCommon.SetIconValue(0);
13699 /* Change the types of oper and commaOp2 to TYP_INT */
13700 op1->gtType = commaOp2->gtType = TYP_INT;
13703 /* Return the GT_COMMA node as the new tree */
13710 /* Check for op2 as a GT_COMMA with a unconditional throw */
13712 if (op2 && fgIsCommaThrow(op2, true))
13714 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13716 /* We can safely throw out the rest of the statements */
13717 fgRemoveRestOfBlock = true;
13720 // If op1 has no side-effects
13721 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13723 // If tree is an asg node
13724 if (tree->OperIsAssignment())
13726 /* Return the throw node as the new tree */
13727 return op2->gtOp.gtOp1;
13730 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13732 /* Return the throw node as the new tree */
13733 return op2->gtOp.gtOp1;
13736 // If tree is a comma node
13737 if (tree->OperGet() == GT_COMMA)
13739 /* Return the throw node as the new tree */
13740 return op2->gtOp.gtOp1;
13743 /* for the shift nodes the type of op2 can differ from the tree type */
13744 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13746 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13748 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13750 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13751 commaOp2->gtIntConCommon.SetLngValue(0);
13753 /* Change the types of oper and commaOp2 to TYP_LONG */
13754 op2->gtType = commaOp2->gtType = TYP_LONG;
13757 if ((genActualType(typ) == TYP_INT) &&
13758 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13760 // An example case is comparison (say GT_GT) of two longs or floating point values.
13762 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13764 commaOp2->ChangeOperConst(GT_CNS_INT);
13765 commaOp2->gtIntCon.gtIconVal = 0;
13766 /* Change the types of oper and commaOp2 to TYP_INT */
13767 op2->gtType = commaOp2->gtType = TYP_INT;
13770 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13772 noway_assert(tree->OperGet() == GT_ADD);
13774 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13776 commaOp2->ChangeOperConst(GT_CNS_INT);
13777 commaOp2->gtIntCon.gtIconVal = 0;
13778 /* Change the types of oper and commaOp2 to TYP_BYREF */
13779 op2->gtType = commaOp2->gtType = TYP_BYREF;
13782 /* types should now match */
13783 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13785 /* Return the GT_COMMA node as the new tree */
13791 /*-------------------------------------------------------------------------
13792 * Optional morphing is done if tree transformations is permitted
13795 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13800 tree = fgMorphSmpOpOptional(tree->AsOp());
13802 } // extra scope for gcc workaround
13806 #pragma warning(pop)
13809 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13811 genTreeOps oper = tree->gtOper;
13812 GenTree* op1 = tree->gtOp1;
13813 GenTree* op2 = tree->gtOp2;
13814 var_types typ = tree->TypeGet();
13816 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
13818 /* Swap the operands so that the more expensive one is 'op1' */
13820 if (tree->gtFlags & GTF_REVERSE_OPS)
13828 tree->gtFlags &= ~GTF_REVERSE_OPS;
13831 if (oper == op2->gtOper)
13833 /* Reorder nested operators at the same precedence level to be
13834 left-recursive. For example, change "(a+(b+c))" to the
13835 equivalent expression "((a+b)+c)".
13838 /* Things are handled differently for floating-point operators */
13840 if (!varTypeIsFloating(tree->TypeGet()))
13842 fgMoveOpsLeft(tree);
13851 /* Change "((x+icon)+y)" to "((x+y)+icon)"
13852 Don't reorder floating-point operations */
13854 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13855 varTypeIsIntegralOrI(typ))
13857 GenTreePtr ad2 = op1->gtOp.gtOp2;
13859 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13871 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
13872 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13873 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
13876 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13879 if (varTypeIsGC(op2->TypeGet()))
13881 noway_assert(varTypeIsGC(typ));
13886 op1->gtOp.gtOp2 = op2;
13887 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13895 /*-------------------------------------------------------------------------
13896 * Perform optional oper-specific postorder morphing
13902 bool dstIsSafeLclVar;
13905 /* We'll convert "a = a <op> x" into "a <op>= x" */
13906 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
13907 CLANG_FORMAT_COMMENT_ANCHOR;
13909 if (typ == TYP_LONG)
13914 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13916 if (tree->OperIsCopyBlkOp())
13918 return fgMorphCopyBlock(tree);
13922 return fgMorphInitBlock(tree);
13926 /* Make sure we're allowed to do this */
13928 if (optValnumCSE_phase)
13930 // It is not safe to reorder/delete CSE's
13934 /* Are we assigning to a GT_LCL_VAR ? */
13936 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13938 /* If we have a GT_LCL_VAR, then is the address taken? */
13939 if (dstIsSafeLclVar)
13941 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
13942 LclVarDsc* varDsc = lvaTable + lclNum;
13944 noway_assert(lclNum < lvaCount);
13946 /* Is the address taken? */
13947 if (varDsc->lvAddrExposed)
13949 dstIsSafeLclVar = false;
13951 else if (op2->gtFlags & GTF_ASG)
13957 if (!dstIsSafeLclVar)
13959 if (op2->gtFlags & GTF_ASG)
13964 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13970 /* Special case: a cast that can be thrown away */
13972 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13978 srct = op2->gtCast.CastOp()->TypeGet();
13979 cast = (var_types)op2->CastToType();
13980 dstt = op1->TypeGet();
13982 /* Make sure these are all ints and precision is not lost */
13984 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13986 op2 = tree->gtOp2 = op2->gtCast.CastOp();
13990 /* Make sure we have the operator range right */
13992 static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
13993 static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
13994 static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
13995 static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
13996 static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
13997 static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
13999 static_assert(GT_OR == GT_ADD + 7, "bad oper value");
14000 static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
14001 static_assert(GT_AND == GT_ADD + 9, "bad oper value");
14003 static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
14004 static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
14005 static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
14007 /* Check for a suitable operator on the RHS */
14009 cmop = op2->OperGet();
14014 // GT_CHS only supported for integer types
14015 if (varTypeIsFloating(tree->TypeGet()))
14023 // GT_ASG_MUL only supported for floating point types
14024 if (!varTypeIsFloating(tree->TypeGet()))
14033 if (op2->gtOverflow())
14035 /* Disable folding into "<op>=" if the result can be
14036 visible to anyone as <op> may throw an exception and
14037 the assignment should not proceed
14038 We are safe with an assignment to a local variables
14040 if (ehBlockHasExnFlowDsc(compCurBB))
14044 if (!dstIsSafeLclVar)
14049 #ifndef _TARGET_AMD64_
14050 // This is hard for byte-operations as we need to make
14051 // sure both operands are in RBM_BYTE_REGS.
14052 if (varTypeIsByte(op2->TypeGet()))
14054 #endif // _TARGET_AMD64_
14059 // GT_ASG_DIV only supported for floating point types
14060 if (!varTypeIsFloating(tree->TypeGet()))
14073 bool bReverse = false;
14074 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14075 if (bAsgOpFoldable)
14079 // We will transform this from "a = x <op> a" to "a <op>= x"
14080 // so we can now destroy the duplicate "a"
14081 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14082 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14085 /* Special case: "x |= -1" and "x &= 0" */
14086 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14087 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14089 /* Simply change to an assignment */
14090 tree->gtOp2 = op2->gtOp.gtOp2;
14094 if (cmop == GT_NEG)
14096 /* This is "x = -x;", use the flipsign operator */
14098 tree->ChangeOper(GT_CHS);
14100 if (op1->gtOper == GT_LCL_VAR)
14102 op1->gtFlags |= GTF_VAR_USEASG;
14105 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14110 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14112 // Changing from x = x op y to x op= y when x is a small integer type
14113 // makes the op size smaller (originally the op size was 32 bits, after
14114 // sign or zero extension of x, and there is an implicit truncation in the
14116 // This is ok in most cases because the upper bits were
14117 // lost when assigning the op result to a small type var,
14118 // but it may not be ok for the right shift operation where the higher bits
14119 // could be shifted into the lower bits and preserved.
14120 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14121 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14122 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14125 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14126 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14127 // The result becomes correct if we use >>unsigned instead of >>signed.
14128 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14132 /* Replace with an assignment operator */
14133 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14134 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14135 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14136 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14137 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14138 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14139 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14140 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14142 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14143 tree->gtOp2 = op2->gtOp.gtOp2;
14145 /* Propagate GTF_OVERFLOW */
14147 if (op2->gtOverflowEx())
14149 tree->gtType = op2->gtType;
14150 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14153 #if FEATURE_SET_FLAGS
14155 /* Propagate GTF_SET_FLAGS */
14156 if (op2->gtSetFlags())
14158 tree->gtRequestSetFlags();
14161 #endif // FEATURE_SET_FLAGS
14163 DEBUG_DESTROY_NODE(op2);
14166 /* The target is used as well as being defined */
14167 if (op1->OperIsLocal())
14169 op1->gtFlags &= ~GTF_VAR_USEDEF;
14170 op1->gtFlags |= GTF_VAR_USEASG;
14173 #if CPU_HAS_FP_SUPPORT
14174 /* Check for the special case "x += y * x;" */
14176 // GT_ASG_MUL only supported for floating point types
14177 if (cmop != GT_ADD && cmop != GT_SUB)
14182 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14184 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14186 /* Change "x += x * y" into "x *= (y + 1)" */
14188 op2 = op2->gtOp.gtOp2;
14190 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14192 /* Change "x += y * x" into "x *= (y + 1)" */
14194 op2 = op2->gtOp.gtOp1;
14201 op1 = gtNewDconNode(1.0);
14203 /* Now make the "*=" node */
14205 if (cmop == GT_ADD)
14207 /* Change "x += x * y" into "x *= (y + 1)" */
14209 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14213 /* Change "x -= x * y" into "x *= (1 - y)" */
14215 noway_assert(cmop == GT_SUB);
14216 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14218 tree->ChangeOper(GT_ASG_MUL);
14220 #endif // CPU_HAS_FP_SUPPORT
14228 /* Is the destination identical to the first RHS sub-operand? */
14230 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14232 /* This is "x = ~x" which is the same as "x ^= -1"
14233 * Transform the node into a GT_ASG_XOR */
14235 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14237 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14252 /* Check for the case "(val + icon) * icon" */
14254 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14256 GenTreePtr add = op1->gtOp.gtOp2;
14258 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14260 if (tree->gtOverflow() || op1->gtOverflow())
14265 ssize_t imul = op2->gtIntCon.gtIconVal;
14266 ssize_t iadd = add->gtIntCon.gtIconVal;
14268 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14271 tree->ChangeOper(oper);
14273 op2->gtIntCon.gtIconVal = iadd * imul;
14275 op1->ChangeOper(GT_MUL);
14277 add->gtIntCon.gtIconVal = imul;
14278 #ifdef _TARGET_64BIT_
14279 if (add->gtType == TYP_INT)
14281 // we need to properly re-sign-extend or truncate after multiplying two int constants above
14282 add->AsIntCon()->TruncateOrSignExtend32();
14284 #endif //_TARGET_64BIT_
14292 /* For "val / 1", just return "val" */
14294 if (op2->IsIntegralConst(1))
14296 DEBUG_DESTROY_NODE(tree);
14304 /* Check for the case "(val + icon) << icon" */
14306 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14308 GenTreePtr cns = op1->gtOp.gtOp2;
14310 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14312 ssize_t ishf = op2->gtIntConCommon.IconValue();
14313 ssize_t iadd = cns->gtIntConCommon.IconValue();
14315 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14317 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14319 tree->ChangeOper(GT_ADD);
14320 ssize_t result = iadd << ishf;
14321 op2->gtIntConCommon.SetIconValue(result);
14322 #ifdef _TARGET_64BIT_
14323 if (op1->gtType == TYP_INT)
14325 op2->AsIntCon()->TruncateOrSignExtend32();
14327 #endif // _TARGET_64BIT_
14329 // we are reusing the shift amount node here, but the type we want is that of the shift result
14330 op2->gtType = op1->gtType;
14332 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14333 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14335 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14336 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14339 op1->ChangeOper(GT_LSH);
14341 cns->gtIntConCommon.SetIconValue(ishf);
14349 if (!optValnumCSE_phase)
14351 /* "x ^ -1" is "~x" */
14353 if (op2->IsIntegralConst(-1))
14355 tree->ChangeOper(GT_NOT);
14356 tree->gtOp2 = nullptr;
14357 DEBUG_DESTROY_NODE(op2);
14359 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14361 /* "binaryVal ^ 1" is "!binaryVal" */
14362 gtReverseCond(op1);
14363 DEBUG_DESTROY_NODE(op2);
14364 DEBUG_DESTROY_NODE(tree);
14372 // Initialization values for initBlk have special semantics - their lower
14373 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14374 // which enables them to get a VNForZero, and be propagated.
14375 if (op1->IsIntegralConst(0))
14387 //------------------------------------------------------------------------
14388 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14389 // (see ECMA III 3.55 and III.3.56).
14392 // tree - The GT_MOD/GT_UMOD tree to morph
14395 // The morphed tree
14398 // For ARM64 we don't have a remainder instruction so this transform is
14399 // always done. For XARCH this transform is done if we know that magic
14400 // division will be used, in that case this transform allows CSE to
14401 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14403 // This method will produce the above expression in 'a' and 'b' are
14404 // leaf nodes, otherwise, if any of them is not a leaf it will spill
14405 // its value into a temporary variable, an example:
14406 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14408 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14410 if (tree->OperGet() == GT_MOD)
14412 tree->SetOper(GT_DIV);
14414 else if (tree->OperGet() == GT_UMOD)
14416 tree->SetOper(GT_UDIV);
14420 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14423 var_types type = tree->gtType;
14424 GenTree* denominator = tree->gtOp2;
14425 GenTree* numerator = tree->gtOp1;
14427 if (!numerator->OperIsLeaf())
14429 numerator = fgMakeMultiUse(&tree->gtOp1);
14431 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14433 // Morphing introduces new lclVar references. Increase ref counts
14434 lvaIncRefCnts(numerator);
14437 if (!denominator->OperIsLeaf())
14439 denominator = fgMakeMultiUse(&tree->gtOp2);
14441 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14443 // Morphing introduces new lclVar references. Increase ref counts
14444 lvaIncRefCnts(denominator);
14447 // The numerator and denominator may have been assigned to temps, in which case
14448 // their defining assignments are in the current tree. Therefore, we need to
14449 // set the execuction order accordingly on the nodes we create.
14450 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14451 // be set to be evaluated in reverse order.
14453 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14454 assert(!mul->IsReverseOp());
14455 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14456 sub->gtFlags |= GTF_REVERSE_OPS;
14459 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14465 //------------------------------------------------------------------------------
14466 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14470 // oper - Operation to check
14473 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
14475 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14477 return (oper == GT_OR) || (oper == GT_XOR);
14480 //------------------------------------------------------------------------------
14481 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14482 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14485 // tree - tree to check for a rotation pattern
14488 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14491 // The input is a GT_OR or a GT_XOR tree.
14493 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14495 #ifndef LEGACY_BACKEND
14497 // Check for a rotation pattern, e.g.,
14510 // The patterns recognized:
14511 // (x << (y & M)) op (x >>> ((-y + N) & M))
14512 // (x >>> ((-y + N) & M)) op (x << (y & M))
14514 // (x << y) op (x >>> (-y + N))
14515 // (x >> > (-y + N)) op (x << y)
14517 // (x >>> (y & M)) op (x << ((-y + N) & M))
14518 // (x << ((-y + N) & M)) op (x >>> (y & M))
14520 // (x >>> y) op (x << (-y + N))
14521 // (x << (-y + N)) op (x >>> y)
14523 // (x << c1) op (x >>> c2)
14524 // (x >>> c1) op (x << c2)
14527 // c1 and c2 are const
14528 // c1 + c2 == bitsize(x)
14531 // M & (N - 1) == N - 1
14532 // op is either | or ^
14534 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14536 // We can't do anything if the tree has assignments, calls, or volatile
14537 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14538 // thrown by the original tree will be thrown by the transformed tree as well.
14542 genTreeOps oper = tree->OperGet();
14543 assert(fgOperIsBitwiseRotationRoot(oper));
14545 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14546 GenTreePtr op1 = tree->gtGetOp1();
14547 GenTreePtr op2 = tree->gtGetOp2();
14548 GenTreePtr leftShiftTree = nullptr;
14549 GenTreePtr rightShiftTree = nullptr;
14550 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14552 leftShiftTree = op1;
14553 rightShiftTree = op2;
14555 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14557 leftShiftTree = op2;
14558 rightShiftTree = op1;
14565 // Check if the trees representing the value to shift are identical.
14566 // We already checked that there are no side effects above.
14567 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14569 GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
14570 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14571 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14572 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14573 GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
14574 GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14576 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14577 // shouldn't be masked for the transformation to be valid. If additional
14578 // higher bits are not masked, the transformation is still valid since the result
14579 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14580 // than the width of the value being shifted.
14581 ssize_t minimalMask = rotatedValueBitSize - 1;
14582 ssize_t leftShiftMask = -1;
14583 ssize_t rightShiftMask = -1;
14585 if ((leftShiftIndex->OperGet() == GT_AND))
14587 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14589 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14590 leftShiftIndex = leftShiftIndex->gtGetOp1();
14598 if ((rightShiftIndex->OperGet() == GT_AND))
14600 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14602 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14603 rightShiftIndex = rightShiftIndex->gtGetOp1();
14611 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14613 // The shift index is overmasked, e.g., we have
14614 // something like (x << y & 15) or
14615 // (x >> (32 - y) & 15 with 32 bit x.
14616 // The transformation is not valid.
14620 GenTreePtr shiftIndexWithAdd = nullptr;
14621 GenTreePtr shiftIndexWithoutAdd = nullptr;
14622 genTreeOps rotateOp = GT_NONE;
14623 GenTreePtr rotateIndex = nullptr;
14625 if (leftShiftIndex->OperGet() == GT_ADD)
14627 shiftIndexWithAdd = leftShiftIndex;
14628 shiftIndexWithoutAdd = rightShiftIndex;
14631 else if (rightShiftIndex->OperGet() == GT_ADD)
14633 shiftIndexWithAdd = rightShiftIndex;
14634 shiftIndexWithoutAdd = leftShiftIndex;
14638 if (shiftIndexWithAdd != nullptr)
14640 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14642 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14644 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14646 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14648 // We found one of these patterns:
14649 // (x << (y & M)) | (x >>> ((-y + N) & M))
14650 // (x << y) | (x >>> (-y + N))
14651 // (x >>> (y & M)) | (x << ((-y + N) & M))
14652 // (x >>> y) | (x << (-y + N))
14653 // where N == bitsize(x), M is const, and
14654 // M & (N - 1) == N - 1
14655 CLANG_FORMAT_COMMENT_ANCHOR;
14657 #ifndef _TARGET_64BIT_
14658 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14660 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14661 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14662 // to add helpers for GT_ROL and GT_ROR.
14667 rotateIndex = shiftIndexWithoutAdd;
14673 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14675 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14677 // We found this pattern:
14678 // (x << c1) | (x >>> c2)
14679 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14681 rotateIndex = leftShiftIndex;
14685 if (rotateIndex != nullptr)
14687 noway_assert(GenTree::OperIsRotate(rotateOp));
14689 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14691 // We can use the same tree only during global morph; reusing the tree in a later morph
14692 // may invalidate value numbers.
14695 tree->gtOp.gtOp1 = rotatedValue;
14696 tree->gtOp.gtOp2 = rotateIndex;
14697 tree->ChangeOper(rotateOp);
14699 unsigned childFlags = 0;
14700 for (GenTree* op : tree->Operands())
14702 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14705 // The parent's flags should be a superset of its operands' flags
14706 noway_assert((inputTreeEffects & childFlags) == childFlags);
14710 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14711 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14717 #endif // LEGACY_BACKEND
14721 #if !CPU_HAS_FP_SUPPORT
14722 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14725 genTreeOps oper = tree->OperGet();
14726 var_types typ = tree->TypeGet();
14727 GenTreePtr op1 = tree->gtOp.gtOp1;
14728 GenTreePtr op2 = tree->gtGetOp2IfPresent();
14731 We have to use helper calls for all FP operations:
14733 FP operators that operate on FP values
14734 casts to and from FP
14735 comparisons of FP values
14738 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14742 size_t argc = genTypeStSz(typ);
14744 /* Not all FP operations need helper calls */
14758 /* If the result isn't FP, it better be a compare or cast */
14760 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14763 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14766 /* Keep track of how many arguments we're passing */
14768 fgPtrArgCntCur += argc;
14770 /* Is this a binary operator? */
14774 /* Add the second operand to the argument count */
14776 fgPtrArgCntCur += argc;
14779 /* What kind of an operator do we have? */
14784 helper = CPX_R4_ADD;
14787 helper = CPX_R4_SUB;
14790 helper = CPX_R4_MUL;
14793 helper = CPX_R4_DIV;
14795 // case GT_MOD: helper = CPX_R4_REM; break;
14798 helper = CPX_R4_EQ;
14801 helper = CPX_R4_NE;
14804 helper = CPX_R4_LT;
14807 helper = CPX_R4_LE;
14810 helper = CPX_R4_GE;
14813 helper = CPX_R4_GT;
14820 noway_assert(!"unexpected FP binary op");
14824 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14834 noway_assert(!"FP cast");
14837 helper = CPX_R4_NEG;
14844 noway_assert(!"unexpected FP unary op");
14848 args = gtNewArgList(tree->gtOp.gtOp1);
14851 /* If we have double result/operands, modify the helper */
14853 if (typ == TYP_DOUBLE)
14855 static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
14856 static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
14857 static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
14858 static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
14859 static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
14865 noway_assert(tree->OperIsCompare());
14867 static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
14868 static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
14869 static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
14870 static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
14871 static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
14872 static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
14875 tree = fgMorphIntoHelperCall(tree, helper, args);
14877 if (fgPtrArgCntMax < fgPtrArgCntCur)
14879 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14880 fgPtrArgCntMax = fgPtrArgCntCur;
14883 fgPtrArgCntCur -= argc;
14891 if (compCurBB == genReturnBB)
14893 /* This is the 'exitCrit' call at the exit label */
14895 noway_assert(op1->gtType == TYP_VOID);
14896 noway_assert(op2 == 0);
14898 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14903 /* This is a (real) return value -- check its type */
14904 CLANG_FORMAT_COMMENT_ANCHOR;
14907 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14909 bool allowMismatch = false;
14911 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14912 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14913 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14914 allowMismatch = true;
14916 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14917 allowMismatch = true;
14919 if (!allowMismatch)
14920 NO_WAY("Return type mismatch");
14930 /*****************************************************************************
14932 * Transform the given tree for code generation and return an equivalent tree.
14935 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14938 assert(tree->gtOper != GT_STMT);
14943 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14945 noway_assert(!"JitBreakMorphTree hit");
14951 int thisMorphNum = 0;
14952 if (verbose && treesBeforeAfterMorph)
14954 thisMorphNum = morphNum++;
14955 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14962 // Apply any rewrites for implicit byref arguments before morphing the
14965 if (fgMorphImplicitByRefArgs(tree))
14968 if (verbose && treesBeforeAfterMorph)
14970 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14977 /*-------------------------------------------------------------------------
14978 * fgMorphTree() can potentially replace a tree with another, and the
14979 * caller has to store the return value correctly.
14980 * Turn this on to always make copy of "tree" here to shake out
14981 * hidden/unupdated references.
14986 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14990 #ifdef SMALL_TREE_NODES
14991 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14993 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14998 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
15001 copy->CopyFrom(tree, this);
15003 #if defined(LATE_DISASM)
15004 // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
15005 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
15007 copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
15008 copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
15012 DEBUG_DESTROY_NODE(tree);
15019 /* Ensure that we haven't morphed this node already */
15020 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15022 #if LOCAL_ASSERTION_PROP
15023 /* Before morphing the tree, we try to propagate any active assertions */
15024 if (optLocalAssertionProp)
15026 /* Do we have any active assertions? */
15028 if (optAssertionCount > 0)
15030 GenTreePtr newTree = tree;
15031 while (newTree != nullptr)
15034 /* newTree is non-Null if we propagated an assertion */
15035 newTree = optAssertionProp(apFull, tree, nullptr);
15037 assert(tree != nullptr);
15040 PREFAST_ASSUME(tree != nullptr);
15044 /* Save the original un-morphed tree for fgMorphTreeDone */
15046 GenTreePtr oldTree = tree;
15048 /* Figure out what kind of a node we have */
15050 unsigned kind = tree->OperKind();
15052 /* Is this a constant node? */
15054 if (kind & GTK_CONST)
15056 tree = fgMorphConst(tree);
15060 /* Is this a leaf node? */
15062 if (kind & GTK_LEAF)
15064 tree = fgMorphLeaf(tree);
15068 /* Is it a 'simple' unary/binary operator? */
15070 if (kind & GTK_SMPOP)
15072 tree = fgMorphSmpOp(tree, mac);
15076 /* See what kind of a special operator we have here */
15078 switch (tree->OperGet())
15081 tree = fgMorphField(tree, mac);
15085 tree = fgMorphCall(tree->AsCall());
15088 case GT_ARR_BOUNDS_CHECK:
15089 #ifdef FEATURE_SIMD
15091 #endif // FEATURE_SIMD
15093 fgSetRngChkTarget(tree);
15095 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15096 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
15097 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
15098 // If the index is a comma(throw, x), just return that.
15099 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15101 tree = bndsChk->gtIndex;
15104 // Propagate effects flags upwards
15105 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15106 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15108 // Otherwise, we don't change the tree.
15113 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15114 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15117 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15119 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15120 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15124 fgSetRngChkTarget(tree, false);
15128 case GT_ARR_OFFSET:
15129 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15130 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15131 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15132 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15133 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15134 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15137 fgSetRngChkTarget(tree, false);
15142 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15143 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15144 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15147 case GT_STORE_DYN_BLK:
15148 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15151 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
15152 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15159 noway_assert(!"unexpected operator");
15163 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15168 #if LOCAL_ASSERTION_PROP
15169 //------------------------------------------------------------------------
15170 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15173 // lclNum - The varNum of the lclVar for which we're killing assertions.
15174 // tree - (DEBUG only) the tree responsible for killing its assertions.
15176 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
15178 /* All dependent assertions are killed here */
15180 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15184 AssertionIndex index = optAssertionCount;
15185 while (killed && (index > 0))
15187 if (BitVecOps::IsMember(apTraits, killed, index - 1))
15190 AssertionDsc* curAssertion = optGetAssertion(index);
15191 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15192 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15195 printf("\nThe assignment ");
15197 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15198 optPrintAssertion(curAssertion);
15201 // Remove this bit from the killed mask
15202 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15204 optAssertionRemove(index);
15210 // killed mask should now be zero
15211 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15214 //------------------------------------------------------------------------
15215 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15218 // lclNum - The varNum of the lclVar for which we're killing assertions.
15219 // tree - (DEBUG only) the tree responsible for killing its assertions.
15222 // For structs and struct fields, it will invalidate the children and parent
15224 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15226 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
15228 LclVarDsc* varDsc = &lvaTable[lclNum];
15230 if (varDsc->lvPromoted)
15232 noway_assert(varTypeIsStruct(varDsc));
15234 // Kill the field locals.
15235 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15237 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15240 // Kill the struct local itself.
15241 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15243 else if (varDsc->lvIsStructField)
15245 // Kill the field local.
15246 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15248 // Kill the parent struct.
15249 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15253 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15256 #endif // LOCAL_ASSERTION_PROP
15258 /*****************************************************************************
15260 * This function is called to complete the morphing of a tree node
15261 * It should only be called once for each node.
15262 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15263 * to enforce the invariant that each node is only morphed once.
15264 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15265 * by an equivalent tree.
15269 void Compiler::fgMorphTreeDone(GenTreePtr tree,
15270 GenTreePtr oldTree /* == NULL */
15271 DEBUGARG(int morphNum))
15274 if (verbose && treesBeforeAfterMorph)
15276 printf("\nfgMorphTree (after %d):\n", morphNum);
15278 printf(""); // in our logic this causes a flush
15282 if (!fgGlobalMorph)
15287 if ((oldTree != nullptr) && (oldTree != tree))
15289 /* Ensure that we have morphed this node */
15290 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15293 TransferTestDataToNode(oldTree, tree);
15298 // Ensure that we haven't morphed this node already
15299 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15302 if (tree->OperKind() & GTK_CONST)
15307 #if LOCAL_ASSERTION_PROP
15309 if (!optLocalAssertionProp)
15314 /* Do we have any active assertions? */
15316 if (optAssertionCount > 0)
15318 /* Is this an assignment to a local variable */
15319 GenTreeLclVarCommon* lclVarTree = nullptr;
15320 if (tree->DefinesLocal(this, &lclVarTree))
15322 unsigned lclNum = lclVarTree->gtLclNum;
15323 noway_assert(lclNum < lvaCount);
15324 fgKillDependentAssertions(lclNum DEBUGARG(tree));
15328 /* If this tree makes a new assertion - make it available */
15329 optAssertionGen(tree);
15331 #endif // LOCAL_ASSERTION_PROP
15336 /* Mark this node as being morphed */
15337 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15341 /*****************************************************************************
15343 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15344 * Returns true if we modified the flow graph
15347 bool Compiler::fgFoldConditional(BasicBlock* block)
15349 bool result = false;
15351 // We don't want to make any code unreachable
15352 if (opts.compDbgCode || opts.MinOpts())
15357 if (block->bbJumpKind == BBJ_COND)
15359 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15361 GenTreePtr stmt = block->bbTreeList->gtPrev;
15363 noway_assert(stmt->gtNext == nullptr);
15365 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15367 noway_assert(fgRemoveRestOfBlock);
15369 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15370 fgConvertBBToThrowBB(block);
15372 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15373 fgRemoveRefPred(block->bbNext, block);
15375 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15376 fgRemoveRefPred(block->bbJumpDest, block);
15381 printf("\nConditional folded at BB%02u\n", block->bbNum);
15382 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15388 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15390 /* Did we fold the conditional */
15392 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15394 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15396 if (cond->OperKind() & GTK_CONST)
15398 /* Yupee - we folded the conditional!
15399 * Remove the conditional statement */
15401 noway_assert(cond->gtOper == GT_CNS_INT);
15402 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15404 /* remove the statement from bbTreelist - No need to update
15405 * the reference counts since there are no lcl vars */
15406 fgRemoveStmt(block, stmt);
15408 // block is a BBJ_COND that we are folding the conditional for
15409 // bTaken is the path that will always be taken from block
15410 // bNotTaken is the path that will never be taken from block
15412 BasicBlock* bTaken;
15413 BasicBlock* bNotTaken;
15415 if (cond->gtIntCon.gtIconVal != 0)
15417 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15418 block->bbJumpKind = BBJ_ALWAYS;
15419 bTaken = block->bbJumpDest;
15420 bNotTaken = block->bbNext;
15424 /* Unmark the loop if we are removing a backwards branch */
15425 /* dest block must also be marked as a loop head and */
15426 /* We must be able to reach the backedge block */
15427 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15428 fgReachable(block->bbJumpDest, block))
15430 optUnmarkLoopBlocks(block->bbJumpDest, block);
15433 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15434 block->bbJumpKind = BBJ_NONE;
15435 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15436 bTaken = block->bbNext;
15437 bNotTaken = block->bbJumpDest;
15440 if (fgHaveValidEdgeWeights)
15442 // We are removing an edge from block to bNotTaken
15443 // and we have already computed the edge weights, so
15444 // we will try to adjust some of the weights
15446 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15447 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15449 // We examine the taken edge (block -> bTaken)
15450 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15451 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15452 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15454 if (block->hasProfileWeight())
15456 // The edge weights for (block -> bTaken) are 100% of block's weight
15457 edgeTaken->flEdgeWeightMin = block->bbWeight;
15458 edgeTaken->flEdgeWeightMax = block->bbWeight;
15460 if (!bTaken->hasProfileWeight())
15462 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15464 // Update the weight of bTaken
15465 bTaken->inheritWeight(block);
15470 else if (bTaken->hasProfileWeight())
15472 if (bTaken->countOfInEdges() == 1)
15474 // There is only one in edge to bTaken
15475 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15476 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15478 // Update the weight of block
15479 block->inheritWeight(bTaken);
15484 if (bUpdated != nullptr)
15487 // Now fix the weights of the edges out of 'bUpdated'
15488 switch (bUpdated->bbJumpKind)
15491 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15492 edge->flEdgeWeightMax = bUpdated->bbWeight;
15495 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15496 edge->flEdgeWeightMax = bUpdated->bbWeight;
15499 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15500 edge->flEdgeWeightMax = bUpdated->bbWeight;
15503 // We don't handle BBJ_SWITCH
15509 /* modify the flow graph */
15511 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15512 fgRemoveRefPred(bNotTaken, block);
15517 printf("\nConditional folded at BB%02u\n", block->bbNum);
15518 printf("BB%02u becomes a %s", block->bbNum,
15519 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15520 if (block->bbJumpKind == BBJ_ALWAYS)
15522 printf(" to BB%02u", block->bbJumpDest->bbNum);
15528 /* if the block was a loop condition we may have to modify
15529 * the loop table */
15531 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15533 /* Some loops may have been already removed by
15534 * loop unrolling or conditional folding */
15536 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15541 /* We are only interested in the loop bottom */
15543 if (optLoopTable[loopNum].lpBottom == block)
15545 if (cond->gtIntCon.gtIconVal == 0)
15547 /* This was a bogus loop (condition always false)
15548 * Remove the loop from the table */
15550 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15554 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15555 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15565 else if (block->bbJumpKind == BBJ_SWITCH)
15567 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15569 GenTreePtr stmt = block->bbTreeList->gtPrev;
15571 noway_assert(stmt->gtNext == nullptr);
15573 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15575 noway_assert(fgRemoveRestOfBlock);
15577 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15578 fgConvertBBToThrowBB(block);
15580 /* update the flow graph */
15582 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15583 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15585 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15587 BasicBlock* curJump = *jumpTab;
15589 /* Remove 'block' from the predecessor list of 'curJump' */
15590 fgRemoveRefPred(curJump, block);
15596 printf("\nConditional folded at BB%02u\n", block->bbNum);
15597 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15603 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15605 /* Did we fold the conditional */
15607 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15609 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15611 if (cond->OperKind() & GTK_CONST)
15613 /* Yupee - we folded the conditional!
15614 * Remove the conditional statement */
15616 noway_assert(cond->gtOper == GT_CNS_INT);
15618 /* remove the statement from bbTreelist - No need to update
15619 * the reference counts since there are no lcl vars */
15620 fgRemoveStmt(block, stmt);
15622 /* modify the flow graph */
15624 /* Find the actual jump target */
15625 unsigned switchVal;
15626 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15628 jumpCnt = block->bbJumpSwt->bbsCount;
15629 BasicBlock** jumpTab;
15630 jumpTab = block->bbJumpSwt->bbsDstTab;
15634 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15636 BasicBlock* curJump = *jumpTab;
15638 assert(curJump->countOfInEdges() > 0);
15640 // If val matches switchVal or we are at the last entry and
15641 // we never found the switch value then set the new jump dest
15643 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15645 if (curJump != block->bbNext)
15647 /* transform the basic block into a BBJ_ALWAYS */
15648 block->bbJumpKind = BBJ_ALWAYS;
15649 block->bbJumpDest = curJump;
15651 // if we are jumping backwards, make sure we have a GC Poll.
15652 if (curJump->bbNum > block->bbNum)
15654 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15659 /* transform the basic block into a BBJ_NONE */
15660 block->bbJumpKind = BBJ_NONE;
15661 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15667 /* Remove 'block' from the predecessor list of 'curJump' */
15668 fgRemoveRefPred(curJump, block);
15674 printf("\nConditional folded at BB%02u\n", block->bbNum);
15675 printf("BB%02u becomes a %s", block->bbNum,
15676 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15677 if (block->bbJumpKind == BBJ_ALWAYS)
15679 printf(" to BB%02u", block->bbJumpDest->bbNum);
15691 //*****************************************************************************
15693 // Morphs a single statement in a block.
15694 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15696 // Returns true if 'stmt' was removed from the block.
15697 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15700 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15702 assert(block != nullptr);
15703 assert(stmt != nullptr);
15706 compCurStmt = stmt;
15708 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15710 // Bug 1106830 - During the CSE phase we can't just remove
15711 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15712 // This leads to a noway_assert in OptCSE.cpp when
15713 // searching for the removed CSE ref. (using gtFindLink)
15715 if (!optValnumCSE_phase)
15717 // Check for morph as a GT_COMMA with an unconditional throw
15718 if (fgIsCommaThrow(morph, true))
15723 printf("Folding a top-level fgIsCommaThrow stmt\n");
15724 printf("Removing op2 as unreachable:\n");
15725 gtDispTree(morph->gtOp.gtOp2);
15729 // Use the call as the new stmt
15730 morph = morph->gtOp.gtOp1;
15731 noway_assert(morph->gtOper == GT_CALL);
15734 // we can get a throw as a statement root
15735 if (fgIsThrow(morph))
15740 printf("We have a top-level fgIsThrow stmt\n");
15741 printf("Removing the rest of block as unreachable:\n");
15744 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15745 fgRemoveRestOfBlock = true;
15749 stmt->gtStmtExpr = morph;
15751 if (lvaLocalVarRefCounted)
15753 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15754 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15757 // Can the entire tree be removed?
15758 bool removedStmt = fgCheckRemoveStmt(block, stmt);
15760 // Or this is the last statement of a conditional branch that was just folded?
15761 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15763 if (fgFoldConditional(block))
15765 if (block->bbJumpKind != BBJ_THROW)
15767 removedStmt = true;
15774 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15775 gtSetStmtInfo(stmt);
15777 // Have to re-link the nodes for this statement
15778 fgSetStmtSeq(stmt);
15784 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15790 if (fgRemoveRestOfBlock)
15792 // Remove the rest of the stmts in the block
15793 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15795 fgRemoveStmt(block, stmt);
15798 // The rest of block has been removed and we will always throw an exception.
15800 // Update succesors of block
15801 fgRemoveBlockAsPred(block);
15803 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15804 // We should not convert it to a ThrowBB.
15805 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15807 // Convert block to a throw bb
15808 fgConvertBBToThrowBB(block);
15814 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15817 fgRemoveRestOfBlock = false;
15820 return removedStmt;
15823 /*****************************************************************************
15825 * Morph the statements of the given block.
15826 * This function should be called just once for a block. Use fgMorphBlockStmt()
15827 * for reentrant calls.
15830 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15832 fgRemoveRestOfBlock = false;
15834 noway_assert(fgExpandInline == false);
15836 /* Make the current basic block address available globally */
15840 *mult = *lnot = *loadw = false;
15842 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15844 GenTreeStmt* stmt = block->firstStmt();
15845 GenTreePtr prev = nullptr;
15846 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15848 assert(stmt->gtOper == GT_STMT);
15850 if (fgRemoveRestOfBlock)
15852 fgRemoveStmt(block, stmt);
15855 #ifdef FEATURE_SIMD
15856 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15858 fgMorphCombineSIMDFieldAssignments(block, stmt);
15862 fgMorphStmt = stmt;
15863 compCurStmt = stmt;
15864 GenTreePtr tree = stmt->gtStmtExpr;
15868 if (stmt == block->bbTreeList)
15870 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15873 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15877 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15882 /* Morph this statement tree */
15884 GenTreePtr morph = fgMorphTree(tree);
15886 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15888 fgCurrentlyInUseArgTemps->ZeroAll();
15890 // Has fgMorphStmt been sneakily changed ?
15892 if (stmt->gtStmtExpr != tree)
15894 /* This must be tailcall. Ignore 'morph' and carry on with
15895 the tail-call node */
15897 morph = stmt->gtStmtExpr;
15898 noway_assert(compTailCallUsed);
15899 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15900 noway_assert(stmt->gtNextStmt == nullptr);
15902 GenTreeCall* call = morph->AsCall();
15904 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15905 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15907 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15908 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15909 (compCurBB->bbFlags & BBF_HAS_JMP)));
15911 else if (block != compCurBB)
15913 /* This must be a tail call that caused a GCPoll to get
15914 injected. We haven't actually morphed the call yet
15915 but the flag still got set, clear it here... */
15916 CLANG_FORMAT_COMMENT_ANCHOR;
15919 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15922 noway_assert(compTailCallUsed);
15923 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15924 noway_assert(stmt->gtNextStmt == nullptr);
15926 GenTreeCall* call = morph->AsCall();
15929 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15930 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15932 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15933 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15934 (compCurBB->bbFlags & BBF_HAS_JMP)));
15938 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15940 // Clone all the trees to stress gtCloneExpr()
15944 printf("\nfgMorphTree (stressClone from):\n");
15948 morph = gtCloneExpr(morph);
15949 noway_assert(morph);
15953 printf("\nfgMorphTree (stressClone to):\n");
15958 /* If the hash value changes. we modified the tree during morphing */
15961 unsigned newHash = gtHashValue(morph);
15962 if (newHash != oldHash)
15964 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15970 /* Check for morph as a GT_COMMA with an unconditional throw */
15971 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15973 /* Use the call as the new stmt */
15974 morph = morph->gtOp.gtOp1;
15975 noway_assert(morph->gtOper == GT_CALL);
15976 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15978 fgRemoveRestOfBlock = true;
15981 stmt->gtStmtExpr = tree = morph;
15983 noway_assert(fgPtrArgCntCur == 0);
15985 if (fgRemoveRestOfBlock)
15990 /* Has the statement been optimized away */
15992 if (fgCheckRemoveStmt(block, stmt))
15997 /* Check if this block ends with a conditional branch that can be folded */
15999 if (fgFoldConditional(block))
16004 if (ehBlockHasExnFlowDsc(block))
16009 #if OPT_MULT_ADDSUB
16011 /* Note whether we have two or more +=/-= operators in a row */
16013 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
16015 if (prev && prev->gtOper == tree->gtOper)
16023 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
16025 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
16031 if (fgRemoveRestOfBlock)
16033 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
16035 GenTreePtr first = block->bbTreeList;
16036 noway_assert(first);
16037 GenTreePtr last = first->gtPrev;
16038 noway_assert(last && last->gtNext == nullptr);
16039 GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
16041 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
16042 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
16044 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
16046 if (op1->OperKind() & GTK_RELOP)
16048 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
16049 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
16052 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
16056 /* Mark block as a BBJ_THROW block */
16057 fgConvertBBToThrowBB(block);
16060 noway_assert(fgExpandInline == false);
16062 #if FEATURE_FASTTAILCALL
16063 GenTreePtr recursiveTailCall = nullptr;
16064 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
16066 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
16071 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16074 // Reset this back so that it doesn't leak out impacting other blocks
16075 fgRemoveRestOfBlock = false;
16078 /*****************************************************************************
16080 * Morph the blocks of the method.
16081 * Returns true if the basic block list is modified.
16082 * This function should be called just once.
16085 void Compiler::fgMorphBlocks()
16090 printf("\n*************** In fgMorphBlocks()\n");
16094 /* Since fgMorphTree can be called after various optimizations to re-arrange
16095 * the nodes we need a global flag to signal if we are during the one-pass
16096 * global morphing */
16098 fgGlobalMorph = true;
16100 #if LOCAL_ASSERTION_PROP
16102 // Local assertion prop is enabled if we are optimized
16104 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16106 if (optLocalAssertionProp)
16109 // Initialize for local assertion prop
16111 optAssertionInit(true);
16113 #elif ASSERTION_PROP
16115 // If LOCAL_ASSERTION_PROP is not set
16116 // and we have global assertion prop
16117 // then local assertion prop is always off
16119 optLocalAssertionProp = false;
16123 /*-------------------------------------------------------------------------
16124 * Process all basic blocks in the function
16127 BasicBlock* block = fgFirstBB;
16128 noway_assert(block);
16131 compCurStmtNum = 0;
16136 #if OPT_MULT_ADDSUB
16144 bool loadw = false;
16149 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16153 #if LOCAL_ASSERTION_PROP
16154 if (optLocalAssertionProp)
16157 // Clear out any currently recorded assertion candidates
16158 // before processing each basic block,
16159 // also we must handle QMARK-COLON specially
16161 optAssertionReset(0);
16165 /* Process all statement trees in the basic block */
16169 fgMorphStmts(block, &mult, &lnot, &loadw);
16171 #if OPT_MULT_ADDSUB
16173 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16175 for (tree = block->bbTreeList; tree; tree = tree->gtNext)
16177 assert(tree->gtOper == GT_STMT);
16178 GenTreePtr last = tree->gtStmt.gtStmtExpr;
16180 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16185 GenTreePtr dst1 = last->gtOp.gtOp1;
16186 GenTreePtr src1 = last->gtOp.gtOp2;
16188 if (!last->IsCnsIntOrI())
16193 if (dst1->gtOper != GT_LCL_VAR)
16197 if (!src1->IsCnsIntOrI())
16207 /* Look at the next statement */
16209 temp = tree->gtNext;
16215 noway_assert(temp->gtOper == GT_STMT);
16216 next = temp->gtStmt.gtStmtExpr;
16218 if (next->gtOper != last->gtOper)
16222 if (next->gtType != last->gtType)
16227 dst2 = next->gtOp.gtOp1;
16228 src2 = next->gtOp.gtOp2;
16230 if (dst2->gtOper != GT_LCL_VAR)
16234 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
16239 if (!src2->IsCnsIntOrI())
16244 if (last->gtOverflow() != next->gtOverflow())
16249 const ssize_t i1 = src1->gtIntCon.gtIconVal;
16250 const ssize_t i2 = src2->gtIntCon.gtIconVal;
16251 const ssize_t itemp = i1 + i2;
16253 /* if the operators are checking for overflow, check for overflow of the operands */
16255 if (next->gtOverflow())
16257 if (next->TypeGet() == TYP_LONG)
16259 if (next->gtFlags & GTF_UNSIGNED)
16261 ClrSafeInt<UINT64> si1(i1);
16262 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
16269 ClrSafeInt<INT64> si1(i1);
16270 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
16276 else if (next->gtFlags & GTF_UNSIGNED)
16278 ClrSafeInt<UINT32> si1(i1);
16279 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
16286 ClrSafeInt<INT32> si1(i1);
16287 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
16294 /* Fold the two increments/decrements into one */
16296 src1->gtIntCon.gtIconVal = itemp;
16297 #ifdef _TARGET_64BIT_
16298 if (src1->gtType == TYP_INT)
16300 src1->AsIntCon()->TruncateOrSignExtend32();
16302 #endif //_TARGET_64BIT_
16304 /* Remove the second statement completely */
16306 noway_assert(tree->gtNext == temp);
16307 noway_assert(temp->gtPrev == tree);
16311 noway_assert(temp->gtNext->gtPrev == temp);
16313 temp->gtNext->gtPrev = tree;
16314 tree->gtNext = temp->gtNext;
16318 tree->gtNext = nullptr;
16320 noway_assert(block->bbTreeList->gtPrev == temp);
16322 block->bbTreeList->gtPrev = tree;
16333 /* Are we using a single return block? */
16335 if (block->bbJumpKind == BBJ_RETURN)
16337 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16339 /* We'll jump to the genReturnBB */
16340 CLANG_FORMAT_COMMENT_ANCHOR;
16342 #if !defined(_TARGET_X86_)
16343 if (info.compFlags & CORINFO_FLG_SYNCH)
16345 fgConvertSyncReturnToLeave(block);
16348 #endif // !_TARGET_X86_
16350 block->bbJumpKind = BBJ_ALWAYS;
16351 block->bbJumpDest = genReturnBB;
16355 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16356 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16357 // Such blocks do materialize as part of in-lining.
16359 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16360 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16361 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16364 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16366 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16367 GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16369 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16370 if (genReturnLocal != BAD_VAR_NUM)
16372 // Method must be returning a value other than TYP_VOID.
16373 noway_assert(compMethodHasRetVal());
16375 // This block must be ending with a GT_RETURN
16376 noway_assert(last != nullptr);
16377 noway_assert(last->gtOper == GT_STMT);
16378 noway_assert(last->gtNext == nullptr);
16379 noway_assert(ret != nullptr);
16381 // GT_RETURN must have non-null operand as the method is returning the value assigned to
16383 noway_assert(ret->OperGet() == GT_RETURN);
16384 noway_assert(ret->gtGetOp1() != nullptr);
16386 GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16388 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16390 // make sure that copy-prop ignores this assignment.
16391 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16393 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16395 // This block ends with a GT_RETURN
16396 noway_assert(last != nullptr);
16397 noway_assert(last->gtOper == GT_STMT);
16398 noway_assert(last->gtNext == nullptr);
16400 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16401 noway_assert(ret->TypeGet() == TYP_VOID);
16402 noway_assert(ret->gtGetOp1() == nullptr);
16404 fgRemoveStmt(block, last);
16410 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
16411 fgTableDispBasicBlock(block);
16417 block = block->bbNext;
16420 /* We are done with the global morphing phase */
16422 fgGlobalMorph = false;
16427 fgDispBasicBlocks(true);
16432 //------------------------------------------------------------------------
16433 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16436 // fpPtrArgCntMax records the maximum number of pushed arguments.
16437 // Depending upon this value of the maximum number of pushed arguments
16438 // we may need to use an EBP frame or be partially interuptible.
16439 // This functionality has been factored out of fgSetOptions() because
16440 // the Rationalizer can create new calls.
16443 // This must be called before isFramePointerRequired() is called, because it is a
16444 // phased variable (can only be written before it has been read).
16446 void Compiler::fgCheckArgCnt()
16448 if (!compCanEncodePtrArgCntMax())
16453 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16454 "interruptible\n");
16457 genInterruptible = false;
16459 if (fgPtrArgCntMax >= sizeof(unsigned))
16464 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16467 codeGen->setFramePointerRequired(true);
16471 /*****************************************************************************
16473 * Make some decisions about the kind of code to generate.
16476 void Compiler::fgSetOptions()
16479 /* Should we force fully interruptible code ? */
16480 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16482 noway_assert(!codeGen->isGCTypeFixed());
16483 genInterruptible = true;
16487 if (opts.compDbgCode)
16489 assert(!codeGen->isGCTypeFixed());
16490 genInterruptible = true; // debugging is easier this way ...
16493 /* Assume we won't need an explicit stack frame if this is allowed */
16495 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16496 // the callee-saved registers.
16497 noway_assert(!compTailCallUsed || !compLocallocUsed);
16499 if (compLocallocUsed)
16501 codeGen->setFramePointerRequired(true);
16504 #ifdef _TARGET_X86_
16506 if (compTailCallUsed)
16507 codeGen->setFramePointerRequired(true);
16509 #endif // _TARGET_X86_
16511 if (!opts.genFPopt)
16513 codeGen->setFramePointerRequired(true);
16516 // Assert that the EH table has been initialized by now. Note that
16517 // compHndBBtabAllocCount never decreases; it is a high-water mark
16518 // of table allocation. In contrast, compHndBBtabCount does shrink
16519 // if we delete a dead EH region, and if it shrinks to zero, the
16520 // table pointer compHndBBtab is unreliable.
16521 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16523 #ifdef _TARGET_X86_
16525 // Note: this case, and the !X86 case below, should both use the
16526 // !X86 path. This would require a few more changes for X86 to use
16527 // compHndBBtabCount (the current number of EH clauses) instead of
16528 // info.compXcptnsCount (the number of EH clauses in IL), such as
16529 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16530 // an EH clause that we delete as statically dead code before we
16531 // get here, leaving no EH clauses left, and thus no requirement
16532 // to use a frame pointer because of EH. But until all the code uses
16533 // the same test, leave info.compXcptnsCount here.
16534 if (info.compXcptnsCount > 0)
16536 codeGen->setFramePointerRequiredEH(true);
16539 #else // !_TARGET_X86_
16541 if (compHndBBtabCount > 0)
16543 codeGen->setFramePointerRequiredEH(true);
16546 #endif // _TARGET_X86_
16548 #ifdef UNIX_X86_ABI
16549 if (info.compXcptnsCount > 0)
16551 assert(!codeGen->isGCTypeFixed());
16552 // Enforce fully interruptible codegen for funclet unwinding
16553 genInterruptible = true;
16555 #endif // UNIX_X86_ABI
16559 if (info.compCallUnmanaged)
16561 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16564 if (info.compPublishStubParam)
16566 codeGen->setFramePointerRequiredGCInfo(true);
16569 if (opts.compNeedSecurityCheck)
16571 codeGen->setFramePointerRequiredGCInfo(true);
16573 #ifndef JIT32_GCENCODER
16575 // The decoder only reports objects in frames with exceptions if the frame
16576 // is fully interruptible.
16577 // Even if there is no catch or other way to resume execution in this frame
16578 // the VM requires the security object to remain alive until later, so
16579 // Frames with security objects must be fully interruptible.
16580 genInterruptible = true;
16582 #endif // JIT32_GCENCODER
16585 if (compIsProfilerHookNeeded())
16587 codeGen->setFramePointerRequired(true);
16590 if (info.compIsVarArgs)
16592 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16593 codeGen->setFramePointerRequiredGCInfo(true);
16596 if (lvaReportParamTypeArg())
16598 codeGen->setFramePointerRequiredGCInfo(true);
16601 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16604 /*****************************************************************************/
16606 GenTreePtr Compiler::fgInitThisClass()
16608 noway_assert(!compIsForInlining());
16610 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16612 if (!kind.needsRuntimeLookup)
16614 return fgGetSharedCCtor(info.compClassHnd);
16618 #ifdef FEATURE_READYTORUN_COMPILER
16619 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16620 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16622 CORINFO_RESOLVED_TOKEN resolvedToken;
16623 memset(&resolvedToken, 0, sizeof(resolvedToken));
16625 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16626 // This covers the case of a generic method on a non-generic type.
16627 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16629 resolvedToken.hClass = info.compClassHnd;
16630 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16633 // We need a runtime lookup.
16634 GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16636 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16637 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16638 // inlining and there's no ambiguity.
16639 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16640 gtNewArgList(ctxTree), &kind);
16644 // Collectible types requires that for shared generic code, if we use the generic context paramter
16645 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16646 // context parameter is this that we don't need the eager reporting logic.)
16647 lvaGenericsContextUseCount++;
16649 switch (kind.runtimeLookupKind)
16651 case CORINFO_LOOKUP_THISOBJ:
16652 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16655 GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16656 // Vtable pointer of this object
16657 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16658 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16659 GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16661 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16662 gtNewArgList(vtTree, methodHnd));
16665 case CORINFO_LOOKUP_CLASSPARAM:
16667 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16668 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16671 case CORINFO_LOOKUP_METHODPARAM:
16673 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16674 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16675 gtNewArgList(gtNewIconNode(0), methHndTree));
16680 noway_assert(!"Unknown LOOKUP_KIND");
16685 /*****************************************************************************
16687 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16688 * except for the allowed ? 1 : 0; pattern.
16690 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16692 if ((*tree)->OperGet() == GT_QMARK)
16694 fgCheckQmarkAllowedForm(*tree);
16696 return WALK_CONTINUE;
16699 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16701 assert(tree->OperGet() == GT_QMARK);
16702 #ifndef LEGACY_BACKEND
16703 assert(!"Qmarks beyond morph disallowed.");
16704 #else // LEGACY_BACKEND
16705 GenTreePtr colon = tree->gtOp.gtOp2;
16707 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16708 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16709 #endif // LEGACY_BACKEND
16712 /*****************************************************************************
16714 * Verify that the importer has created GT_QMARK nodes in a way we can
16715 * process them. The following is allowed:
16717 * 1. A top level qmark. Top level qmark is of the form:
16718 * a) (bool) ? (void) : (void) OR
16719 * b) V0N = (bool) ? (type) : (type)
16721 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16722 * of either op1 of colon or op2 of colon but not a child of any other
16725 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16727 GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16729 // If the top level Qmark is null, then scan the tree to make sure
16730 // there are no qmarks within it.
16731 if (topQmark == nullptr)
16733 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16737 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16738 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16739 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16741 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16742 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16747 /*****************************************************************************
16749 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16750 * node is not present. If the top level GT_QMARK node is assigned to a
16751 * GT_LCL_VAR, then return the lcl node in ppDst.
16754 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16756 if (ppDst != nullptr)
16761 GenTreePtr topQmark = nullptr;
16762 if (expr->gtOper == GT_QMARK)
16766 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16768 topQmark = expr->gtOp.gtOp2;
16769 if (ppDst != nullptr)
16771 *ppDst = expr->gtOp.gtOp1;
16777 /*********************************************************************************
16779 * For a castclass helper call,
16780 * Importer creates the following tree:
16781 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16783 * This method splits the qmark expression created by the importer into the
16784 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16785 * Notice that op1 is the result for both the conditions. So we coalesce these
16786 * assignments into a single block instead of two blocks resulting a nested diamond.
16788 * +---------->-----------+
16792 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16794 * We expect to achieve the following codegen:
16795 * mov rsi, rdx tmp = op1 // asgBlock
16796 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16798 * mov rcx, 0x76543210 cns = op2 // cond2Block
16799 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16801 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16803 * SKIP: // remainderBlock
16804 * tmp has the result.
16807 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16812 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16813 fgDispBasicBlocks(block, block, true);
16817 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16819 GenTreePtr dst = nullptr;
16820 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16821 noway_assert(dst != nullptr);
16823 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16825 // Get cond, true, false exprs for the qmark.
16826 GenTreePtr condExpr = qmark->gtGetOp1();
16827 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16828 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16830 // Get cond, true, false exprs for the nested qmark.
16831 GenTreePtr nestedQmark = falseExpr;
16832 GenTreePtr cond2Expr;
16833 GenTreePtr true2Expr;
16834 GenTreePtr false2Expr;
16836 if (nestedQmark->gtOper == GT_QMARK)
16838 cond2Expr = nestedQmark->gtGetOp1();
16839 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16840 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16842 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16843 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16847 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16848 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16849 // That means it does not match our pattern.
16851 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16852 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16853 // entire subtree we expected to be the nested question op.
16855 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16856 true2Expr = nestedQmark;
16857 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16859 assert(false2Expr->OperGet() == trueExpr->OperGet());
16861 // Clear flags as they are now going to be part of JTRUE.
16862 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16863 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16865 // Create the chain of blocks. See method header comment.
16866 // The order of blocks after this is the following:
16867 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16869 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16870 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16871 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16872 // remainderBlock will still be GC safe.
16873 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16874 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16875 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16877 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16878 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16879 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16880 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16882 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16884 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16885 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16886 if ((block->bbFlags & BBF_INTERNAL) == 0)
16888 helperBlock->bbFlags &= ~BBF_INTERNAL;
16889 cond2Block->bbFlags &= ~BBF_INTERNAL;
16890 cond1Block->bbFlags &= ~BBF_INTERNAL;
16891 asgBlock->bbFlags &= ~BBF_INTERNAL;
16892 helperBlock->bbFlags |= BBF_IMPORTED;
16893 cond2Block->bbFlags |= BBF_IMPORTED;
16894 cond1Block->bbFlags |= BBF_IMPORTED;
16895 asgBlock->bbFlags |= BBF_IMPORTED;
16898 // Chain the flow correctly.
16899 fgAddRefPred(asgBlock, block);
16900 fgAddRefPred(cond1Block, asgBlock);
16901 fgAddRefPred(cond2Block, cond1Block);
16902 fgAddRefPred(helperBlock, cond2Block);
16903 fgAddRefPred(remainderBlock, helperBlock);
16904 fgAddRefPred(remainderBlock, cond1Block);
16905 fgAddRefPred(remainderBlock, cond2Block);
16907 cond1Block->bbJumpDest = remainderBlock;
16908 cond2Block->bbJumpDest = remainderBlock;
16910 // Set the weights; some are guesses.
16911 asgBlock->inheritWeight(block);
16912 cond1Block->inheritWeight(block);
16913 cond2Block->inheritWeightPercentage(cond1Block, 50);
16914 helperBlock->inheritWeightPercentage(cond2Block, 50);
16916 // Append cond1 as JTRUE to cond1Block
16917 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16918 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16919 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16921 // Append cond2 as JTRUE to cond2Block
16922 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16923 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16924 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16926 // AsgBlock should get tmp = op1 assignment.
16927 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16928 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16929 fgInsertStmtAtEnd(asgBlock, trueStmt);
16931 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16932 gtReverseCond(cond2Expr);
16933 GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16934 GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16935 fgInsertStmtAtEnd(helperBlock, helperStmt);
16937 // Finally remove the nested qmark stmt.
16938 fgRemoveStmt(block, stmt);
16943 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16944 fgDispBasicBlocks(block, remainderBlock, true);
16949 /*****************************************************************************
16951 * Expand a statement with a top level qmark node. There are three cases, based
16952 * on whether the qmark has both "true" and "false" arms, or just one of them.
16963 * S0 -->-- ~C -->-- T F -->-- S1
16968 * -----------------------------------------
16977 * S0 -->-- ~C -->-- T -->-- S1
16979 * +-->-------------+
16982 * -----------------------------------------
16991 * S0 -->-- C -->-- F -->-- S1
16993 * +-->------------+
16996 * If the qmark assigns to a variable, then create tmps for "then"
16997 * and "else" results and assign the temp to the variable as a writeback step.
16999 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
17001 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17003 // Retrieve the Qmark node to be expanded.
17004 GenTreePtr dst = nullptr;
17005 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
17006 if (qmark == nullptr)
17011 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
17013 fgExpandQmarkForCastInstOf(block, stmt);
17020 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
17021 fgDispBasicBlocks(block, block, true);
17025 // Retrieve the operands.
17026 GenTreePtr condExpr = qmark->gtGetOp1();
17027 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
17028 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17030 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17031 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17033 assert(!varTypeIsFloating(condExpr->TypeGet()));
17035 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
17036 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
17037 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
17039 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
17040 // block ... condBlock ... elseBlock ... remainderBlock
17042 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17043 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17044 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17045 // remainderBlock will still be GC safe.
17046 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17047 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17048 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17050 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
17051 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
17053 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17054 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17055 if ((block->bbFlags & BBF_INTERNAL) == 0)
17057 condBlock->bbFlags &= ~BBF_INTERNAL;
17058 elseBlock->bbFlags &= ~BBF_INTERNAL;
17059 condBlock->bbFlags |= BBF_IMPORTED;
17060 elseBlock->bbFlags |= BBF_IMPORTED;
17063 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17065 condBlock->inheritWeight(block);
17067 fgAddRefPred(condBlock, block);
17068 fgAddRefPred(elseBlock, condBlock);
17069 fgAddRefPred(remainderBlock, elseBlock);
17071 BasicBlock* thenBlock = nullptr;
17072 if (hasTrueExpr && hasFalseExpr)
17077 // S0 -->-- ~C -->-- T F -->-- S1
17082 gtReverseCond(condExpr);
17083 condBlock->bbJumpDest = elseBlock;
17085 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17086 thenBlock->bbJumpDest = remainderBlock;
17087 if ((block->bbFlags & BBF_INTERNAL) == 0)
17089 thenBlock->bbFlags &= ~BBF_INTERNAL;
17090 thenBlock->bbFlags |= BBF_IMPORTED;
17093 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17095 fgAddRefPred(thenBlock, condBlock);
17096 fgAddRefPred(remainderBlock, thenBlock);
17098 thenBlock->inheritWeightPercentage(condBlock, 50);
17099 elseBlock->inheritWeightPercentage(condBlock, 50);
17101 else if (hasTrueExpr)
17104 // S0 -->-- ~C -->-- T -->-- S1
17106 // +-->-------------+
17109 gtReverseCond(condExpr);
17110 condBlock->bbJumpDest = remainderBlock;
17111 fgAddRefPred(remainderBlock, condBlock);
17112 // Since we have no false expr, use the one we'd already created.
17113 thenBlock = elseBlock;
17114 elseBlock = nullptr;
17116 thenBlock->inheritWeightPercentage(condBlock, 50);
17118 else if (hasFalseExpr)
17121 // S0 -->-- C -->-- F -->-- S1
17123 // +-->------------+
17126 condBlock->bbJumpDest = remainderBlock;
17127 fgAddRefPred(remainderBlock, condBlock);
17129 elseBlock->inheritWeightPercentage(condBlock, 50);
17132 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17133 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17134 fgInsertStmtAtEnd(condBlock, jmpStmt);
17136 // Remove the original qmark statement.
17137 fgRemoveStmt(block, stmt);
17139 // Since we have top level qmarks, we either have a dst for it in which case
17140 // we need to create tmps for true and falseExprs, else just don't bother
17142 unsigned lclNum = BAD_VAR_NUM;
17143 if (dst != nullptr)
17145 assert(dst->gtOper == GT_LCL_VAR);
17146 lclNum = dst->gtLclVar.gtLclNum;
17150 assert(qmark->TypeGet() == TYP_VOID);
17155 if (dst != nullptr)
17157 trueExpr = gtNewTempAssign(lclNum, trueExpr);
17159 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17160 fgInsertStmtAtEnd(thenBlock, trueStmt);
17163 // Assign the falseExpr into the dst or tmp, insert in elseBlock
17166 if (dst != nullptr)
17168 falseExpr = gtNewTempAssign(lclNum, falseExpr);
17170 GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17171 fgInsertStmtAtEnd(elseBlock, falseStmt);
17177 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17178 fgDispBasicBlocks(block, remainderBlock, true);
17183 /*****************************************************************************
17185 * Expand GT_QMARK nodes from the flow graph into basic blocks.
17189 void Compiler::fgExpandQmarkNodes()
17193 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17195 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17197 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17199 fgPreExpandQmarkChecks(expr);
17201 fgExpandQmarkStmt(block, stmt);
17205 fgPostExpandQmarkChecks();
17208 compQmarkRationalized = true;
17212 /*****************************************************************************
17214 * Make sure we don't have any more GT_QMARK nodes.
17217 void Compiler::fgPostExpandQmarkChecks()
17219 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17221 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17223 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17224 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17230 /*****************************************************************************
17232 * Transform all basic blocks for codegen.
17235 void Compiler::fgMorph()
17237 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
17239 fgOutgoingArgTemps = nullptr;
17244 printf("*************** In fgMorph()\n");
17248 fgDispBasicBlocks(true);
17252 // Insert call to class constructor as the first basic block if
17253 // we were asked to do so.
17254 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
17255 impTokenLookupContextHandle /* context */) &
17256 CORINFO_INITCLASS_USE_HELPER)
17258 fgEnsureFirstBBisScratch();
17259 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
17263 if (opts.compGcChecks)
17265 for (unsigned i = 0; i < info.compArgsCount; i++)
17267 if (lvaTable[i].TypeGet() == TYP_REF)
17269 // confirm that the argument is a GC pointer (for debugging (GC stress))
17270 GenTreePtr op = gtNewLclvNode(i, TYP_REF);
17271 GenTreeArgList* args = gtNewArgList(op);
17272 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
17274 fgEnsureFirstBBisScratch();
17275 fgInsertStmtAtEnd(fgFirstBB, op);
17280 if (opts.compStackCheckOnRet)
17282 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
17283 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
17286 if (opts.compStackCheckOnCall)
17288 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
17289 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
17293 /* Filter out unimported BBs */
17295 fgRemoveEmptyBlocks();
17298 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17299 fgDebugCheckBBlist(false, false);
17302 EndPhase(PHASE_MORPH_INIT);
17307 JITDUMP("trees after inlining\n");
17308 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17311 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17313 EndPhase(PHASE_MORPH_INLINE);
17315 /* Add any internal blocks/trees we may need */
17320 fgMultipleNots = false;
17324 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17325 fgDebugCheckBBlist(false, false);
17328 fgRemoveEmptyTry();
17330 EndPhase(PHASE_EMPTY_TRY);
17332 fgRemoveEmptyFinally();
17334 EndPhase(PHASE_EMPTY_FINALLY);
17336 fgMergeFinallyChains();
17338 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17342 EndPhase(PHASE_CLONE_FINALLY);
17344 fgUpdateFinallyTargetFlags();
17346 /* For x64 and ARM64 we need to mark irregular parameters */
17347 fgMarkImplicitByRefArgs();
17349 /* Promote struct locals if necessary */
17350 fgPromoteStructs();
17352 /* Now it is the time to figure out what locals have address-taken. */
17353 fgMarkAddressExposedLocals();
17355 EndPhase(PHASE_STR_ADRLCL);
17357 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
17358 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
17359 fgRetypeImplicitByRefArgs();
17362 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
17364 fgStress64RsltMul();
17367 EndPhase(PHASE_MORPH_IMPBYREF);
17369 /* Morph the trees in all the blocks of the method */
17373 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
17374 fgMarkDemotedImplicitByRefArgs();
17376 EndPhase(PHASE_MORPH_GLOBAL);
17379 JITDUMP("trees after fgMorphBlocks\n");
17380 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17383 /* Decide the kind of code we want to generate */
17387 fgExpandQmarkNodes();
17390 compCurBB = nullptr;
17394 /*****************************************************************************
17396 * Promoting struct locals
17398 void Compiler::fgPromoteStructs()
17403 printf("*************** In fgPromoteStructs()\n");
17407 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17412 if (fgNoStructPromotion)
17418 // The code in this #if has been useful in debugging struct promotion issues, by
17419 // enabling selective enablement of the struct promotion optimization according to
17422 unsigned methHash = info.compMethodHash();
17423 char* lostr = getenv("structpromohashlo");
17424 unsigned methHashLo = 0;
17427 sscanf_s(lostr, "%x", &methHashLo);
17429 char* histr = getenv("structpromohashhi");
17430 unsigned methHashHi = UINT32_MAX;
17433 sscanf_s(histr, "%x", &methHashHi);
17435 if (methHash < methHashLo || methHash > methHashHi)
17441 printf("Promoting structs for method %s, hash = 0x%x.\n",
17442 info.compFullName, info.compMethodHash());
17443 printf(""); // in our logic this causes a flush
17448 if (info.compIsVarArgs)
17453 if (getNeedsGSSecurityCookie())
17461 printf("\nlvaTable before fgPromoteStructs\n");
17466 // The lvaTable might grow as we grab temps. Make a local copy here.
17467 unsigned startLvaCount = lvaCount;
17470 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17472 lvaStructPromotionInfo structPromotionInfo;
17473 bool tooManyLocals = false;
17475 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17477 // Whether this var got promoted
17478 bool promotedVar = false;
17479 LclVarDsc* varDsc = &lvaTable[lclNum];
17481 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17482 // its fields. Instead, we will attempt to enregister the entire struct.
17483 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17485 varDsc->lvRegStruct = true;
17487 // Don't promote if we have reached the tracking limit.
17488 else if (lvaHaveManyLocals())
17490 // Print the message first time when we detected this condition
17491 if (!tooManyLocals)
17493 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17495 tooManyLocals = true;
17497 else if (varTypeIsStruct(varDsc))
17499 bool shouldPromote;
17501 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17502 if (structPromotionInfo.canPromote)
17504 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17508 shouldPromote = false;
17512 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17513 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17514 static int structPromoVarNum = 0;
17515 structPromoVarNum++;
17516 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17521 // Promote the this struct local var.
17522 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17523 promotedVar = true;
17525 #ifdef _TARGET_ARM_
17526 if (structPromotionInfo.requiresScratchVar)
17528 // Ensure that the scratch variable is allocated, in case we
17529 // pass a promoted struct as an argument.
17530 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17532 lvaPromotedStructAssemblyScratchVar =
17533 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17534 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17537 #endif // _TARGET_ARM_
17541 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17543 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17544 // we will treat it as a reg struct.
17545 varDsc->lvRegStruct = true;
17552 printf("\nlvaTable after fgPromoteStructs\n");
17558 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17560 noway_assert(tree->OperGet() == GT_FIELD);
17562 GenTreePtr objRef = tree->gtField.gtFldObj;
17563 GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17564 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17566 /* Is this an instance data member? */
17568 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17570 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17571 LclVarDsc* varDsc = &lvaTable[lclNum];
17573 if (varTypeIsStruct(obj))
17575 if (varDsc->lvPromoted)
17578 unsigned fldOffset = tree->gtField.gtFldOffset;
17579 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17580 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17582 if (lvaIsImplicitByRefLocal(lclNum))
17584 // Keep track of the number of appearances of each promoted implicit
17585 // byref (here during struct promotion, which happens during address-exposed
17586 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17587 // byref params when deciding if it's legal to elide certain copies of them.
17588 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17589 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17590 // chance, so have to check now.
17592 "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
17593 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17594 varDsc->lvRefCnt++;
17597 tree->SetOper(GT_LCL_VAR);
17598 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17599 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17600 tree->gtFlags &= GTF_NODE_MASK;
17601 tree->gtFlags &= ~GTF_GLOB_REF;
17603 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17604 if (parent->gtOper == GT_ASG)
17606 if (parent->gtOp.gtOp1 == tree)
17608 tree->gtFlags |= GTF_VAR_DEF;
17609 tree->gtFlags |= GTF_DONT_CSE;
17612 // Promotion of struct containing struct fields where the field
17613 // is a struct with a single pointer sized scalar type field: in
17614 // this case struct promotion uses the type of the underlying
17615 // scalar field as the type of struct field instead of recursively
17616 // promoting. This can lead to a case where we have a block-asgn
17617 // with its RHS replaced with a scalar type. Mark RHS value as
17618 // DONT_CSE so that assertion prop will not do const propagation.
17619 // The reason this is required is that if RHS of a block-asg is a
17620 // constant, then it is interpreted as init-block incorrectly.
17622 // TODO - This can also be avoided if we implement recursive struct
17624 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17626 tree->gtFlags |= GTF_DONT_CSE;
17632 printf("Replacing the field in promoted struct with a local var:\n");
17633 fgWalkPre->printModified = true;
17636 return WALK_SKIP_SUBTREES;
17642 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17643 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17644 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17645 // there is one extremely rare case where that won't be true. An enum type is a special value type
17646 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17647 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17648 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17649 // ldfld. For example:
17651 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17653 // .field public specialname rtspecialname int16 value__
17654 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17656 // .method public hidebysig static void Main() cil managed
17658 // .locals init (valuetype mynamespace.e_t V_0)
17661 // ldflda int16 mynamespace.e_t::value__
17665 // Normally, compilers will not generate the ldflda, since it is superfluous.
17667 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17668 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17669 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17670 // address taken, and the appropriate SHORT load will be done from memory in that case.
17672 if (tree->TypeGet() == obj->TypeGet())
17674 if (lvaIsImplicitByRefLocal(lclNum))
17676 // Keep track of the number of appearances of each promoted implicit
17677 // byref (here during struct promotion, which happens during address-exposed
17678 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17679 // byref params when deciding if it's legal to elide certain copies of them.
17680 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17681 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17682 // chance, so have to check now.
17683 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
17684 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17685 varDsc->lvRefCnt++;
17688 tree->ChangeOper(GT_LCL_VAR);
17689 tree->gtLclVarCommon.SetLclNum(lclNum);
17690 tree->gtFlags &= GTF_NODE_MASK;
17692 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17693 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17695 tree->gtFlags |= GTF_VAR_DEF;
17696 tree->gtFlags |= GTF_DONT_CSE;
17701 printf("Replacing the field in normed struct with the local var:\n");
17702 fgWalkPre->printModified = true;
17705 return WALK_SKIP_SUBTREES;
17710 return WALK_CONTINUE;
17713 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17715 noway_assert(tree->OperGet() == GT_LCL_FLD);
17717 unsigned lclNum = tree->gtLclFld.gtLclNum;
17718 LclVarDsc* varDsc = &lvaTable[lclNum];
17720 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17723 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17724 unsigned fieldLclIndex = 0;
17725 LclVarDsc* fldVarDsc = nullptr;
17727 if (fldOffset != BAD_VAR_NUM)
17729 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17730 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17731 fldVarDsc = &lvaTable[fieldLclIndex];
17734 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17735 #ifdef _TARGET_X86_
17736 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17740 // There is an existing sub-field we can use
17741 tree->gtLclFld.SetLclNum(fieldLclIndex);
17743 // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
17744 CLANG_FORMAT_COMMENT_ANCHOR;
17746 #ifdef _TARGET_ARM_
17747 assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17749 assert(varTypeIsIntegralOrI(tree->TypeGet()));
17751 if (varTypeCanReg(fldVarDsc->TypeGet()))
17753 // If the type is integer-ish, then we can use it as-is
17754 tree->ChangeOper(GT_LCL_VAR);
17755 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17756 tree->gtType = fldVarDsc->TypeGet();
17760 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17761 fgWalkPre->printModified = true;
17766 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17767 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17769 tree->gtFlags |= GTF_VAR_DEF;
17770 tree->gtFlags |= GTF_DONT_CSE;
17775 // There is no existing field that has all the parts that we need
17776 // So we must ensure that the struct lives in memory.
17777 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17780 // We can't convert this guy to a float because he really does have his
17782 varDsc->lvKeepType = 1;
17786 return WALK_SKIP_SUBTREES;
17789 return WALK_CONTINUE;
17792 //------------------------------------------------------------------------
17793 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
17794 // i.e. which the ABI requires to be passed by making a copy in the caller and
17795 // passing its address to the callee. Mark their `LclVarDsc`s such that
17796 // `lvaIsImplicitByRefLocal` will return true for them.
17798 void Compiler::fgMarkImplicitByRefArgs()
17800 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17804 printf("\n*************** In fgMarkImplicitByRefs()\n");
17808 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17810 LclVarDsc* varDsc = &lvaTable[lclNum];
17812 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17816 if (varDsc->lvSize() > REGSIZE_BYTES)
17818 size = varDsc->lvSize();
17822 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17823 size = info.compCompHnd->getClassSize(typeHnd);
17826 #if defined(_TARGET_AMD64_)
17827 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17828 #elif defined(_TARGET_ARM64_)
17829 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17832 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17833 // So I am now using it to indicate that this is one of the weird implicit
17835 // The address taken cleanup will look for references to locals marked like
17836 // this, and transform them appropriately.
17837 varDsc->lvIsTemp = 1;
17839 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17840 // appearance of implicit-by-ref param so that call arg morphing can do an
17841 // optimization for single-use implicit-by-ref params whose single use is as
17842 // an outgoing call argument.
17843 varDsc->lvRefCnt = 0;
17848 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17851 //------------------------------------------------------------------------
17852 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17853 // struct to pointer). Also choose (based on address-exposed analysis)
17854 // which struct promotions of implicit byrefs to keep or discard.
17855 // For those which are kept, insert the appropriate initialization code.
17856 // For those which are to be discarded, annotate the promoted field locals
17857 // so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17858 // using indirections off the pointer parameters.
17860 void Compiler::fgRetypeImplicitByRefArgs()
17862 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17866 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17870 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17872 LclVarDsc* varDsc = &lvaTable[lclNum];
17874 if (lvaIsImplicitByRefLocal(lclNum))
17878 if (varDsc->lvSize() > REGSIZE_BYTES)
17880 size = varDsc->lvSize();
17884 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17885 size = info.compCompHnd->getClassSize(typeHnd);
17888 if (varDsc->lvPromoted)
17890 // This implicit-by-ref was promoted; create a new temp to represent the
17891 // promoted struct before rewriting this parameter as a pointer.
17892 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17893 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17894 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17895 varDsc = &lvaTable[lclNum];
17897 // Copy the struct promotion annotations to the new temp.
17898 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
17899 newVarDsc->lvPromoted = true;
17900 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17901 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
17902 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17903 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
17905 newVarDsc->lvKeepType = true;
17908 // Propagate address-taken-ness and do-not-enregister-ness.
17909 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
17910 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17912 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
17913 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
17914 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17915 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17916 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
17919 // If the promotion is dependent, the promoted temp would just be committed
17920 // to memory anyway, so we'll rewrite its appearances to be indirections
17921 // through the pointer parameter, the same as we'd do for this
17922 // parameter if it weren't promoted at all (otherwise the initialization
17923 // of the new temp would just be a needless memcpy at method entry).
17924 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17925 (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
17927 if (!undoPromotion)
17929 // Insert IR that initializes the temp from the parameter.
17930 // LHS is a simple reference to the temp.
17931 fgEnsureFirstBBisScratch();
17932 GenTreePtr lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17933 // RHS is an indirection (using GT_OBJ) off the parameter.
17934 GenTreePtr addr = gtNewLclvNode(lclNum, TYP_BYREF);
17935 GenTreePtr rhs = gtNewBlockVal(addr, (unsigned)size);
17936 GenTreePtr assign = gtNewAssignNode(lhs, rhs);
17937 fgInsertStmtAtBeg(fgFirstBB, assign);
17940 // Update the locals corresponding to the promoted fields.
17941 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17942 unsigned fieldCount = varDsc->lvFieldCnt;
17943 unsigned fieldLclStop = fieldLclStart + fieldCount;
17945 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17947 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17951 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17952 // will know to rewrite appearances of this local.
17953 assert(fieldVarDsc->lvParentLcl == lclNum);
17957 // Set the new parent.
17958 fieldVarDsc->lvParentLcl = newLclNum;
17959 // Clear the ref count field; it is used to communicate the nubmer of references
17960 // to the implicit byref parameter when morphing calls that pass the implicit byref
17961 // out as an outgoing argument value, but that doesn't pertain to this field local
17962 // which is now a field of a non-arg local.
17963 fieldVarDsc->lvRefCnt = 0;
17966 fieldVarDsc->lvIsParam = false;
17967 // The fields shouldn't inherit any register preferences from
17968 // the parameter which is really a pointer to the struct.
17969 fieldVarDsc->lvIsRegArg = false;
17970 fieldVarDsc->lvIsMultiRegArg = false;
17971 fieldVarDsc->lvSetIsHfaRegArg(false);
17972 fieldVarDsc->lvArgReg = REG_NA;
17973 #if FEATURE_MULTIREG_ARGS
17974 fieldVarDsc->lvOtherArgReg = REG_NA;
17976 fieldVarDsc->lvPrefReg = 0;
17979 // Hijack lvFieldLclStart to record the new temp number.
17980 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17981 varDsc->lvFieldLclStart = newLclNum;
17982 // Go ahead and clear lvFieldCnt -- either we're promoting
17983 // a replacement temp or we're not promoting this arg, and
17984 // in either case the parameter is now a pointer that doesn't
17985 // have these fields.
17986 varDsc->lvFieldCnt = 0;
17988 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17989 // whether references to the struct should be rewritten as
17990 // indirections off the pointer (not promoted) or references
17991 // to the new struct local (promoted).
17992 varDsc->lvPromoted = !undoPromotion;
17996 // The "undo promotion" path above clears lvPromoted for args that struct
17997 // promotion wanted to promote but that aren't considered profitable to
17998 // rewrite. It hijacks lvFieldLclStart to communicate to
17999 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
18000 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
18001 // Here we have an arg that was simply never promoted, so make sure it doesn't
18002 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
18003 // and fgMarkDemotedImplicitByRefArgs.
18004 assert(varDsc->lvFieldLclStart == 0);
18007 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
18008 varDsc->lvType = TYP_BYREF;
18010 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
18011 // make sure that the following flag is not set as these will force SSA to
18012 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
18014 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
18017 // This should not be converted to a double in stress mode,
18018 // because it is really a pointer
18019 varDsc->lvKeepType = 1;
18021 // The struct parameter may have had its address taken, but the pointer parameter
18022 // cannot -- any uses of the struct parameter's address are uses of the pointer
18023 // parameter's value, and there's no way for the MSIL to reference the pointer
18024 // parameter's address. So clear the address-taken bit for the parameter.
18025 varDsc->lvAddrExposed = 0;
18026 varDsc->lvDoNotEnregister = 0;
18030 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
18036 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18039 //------------------------------------------------------------------------
18040 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
18041 // asked to promote. Appearances of these have now been rewritten
18042 // (by fgMorphImplicitByRefArgs) using indirections from the pointer
18043 // parameter or references to the promotion temp, as appropriate.
18045 void Compiler::fgMarkDemotedImplicitByRefArgs()
18047 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18049 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18051 LclVarDsc* varDsc = &lvaTable[lclNum];
18053 if (lvaIsImplicitByRefLocal(lclNum))
18055 if (varDsc->lvPromoted)
18057 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
18058 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
18059 // appearances of this arg needed to be rewritten to a new promoted struct local.
18060 varDsc->lvPromoted = false;
18062 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
18063 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
18064 varDsc->lvFieldLclStart = 0;
18066 else if (varDsc->lvFieldLclStart != 0)
18068 // We created new temps to represent a promoted struct corresponding to this
18069 // parameter, but decided not to go through with the promotion and have
18070 // rewritten all uses as indirections off the pointer parameter.
18071 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18072 // note of that and clear the annotation.
18073 unsigned structLclNum = varDsc->lvFieldLclStart;
18074 varDsc->lvFieldLclStart = 0;
18076 // Clear the arg's ref count; this was set during address-taken analysis so that
18077 // call morphing could identify single-use implicit byrefs; we're done with
18078 // that, and want it to be in its default state of zero when we go to set
18079 // real ref counts for all variables.
18080 varDsc->lvRefCnt = 0;
18082 // The temp struct is now unused; set flags appropriately so that we
18083 // won't allocate space for it on the stack.
18084 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
18085 structVarDsc->lvRefCnt = 0;
18086 structVarDsc->lvAddrExposed = false;
18088 structVarDsc->lvUnusedStruct = true;
18091 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18092 unsigned fieldCount = structVarDsc->lvFieldCnt;
18093 unsigned fieldLclStop = fieldLclStart + fieldCount;
18095 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18097 // Fix the pointer to the parent local.
18098 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18099 assert(fieldVarDsc->lvParentLcl == lclNum);
18100 fieldVarDsc->lvParentLcl = structLclNum;
18102 // The field local is now unused; set flags appropriately so that
18103 // we won't allocate stack space for it.
18104 fieldVarDsc->lvRefCnt = 0;
18105 fieldVarDsc->lvAddrExposed = false;
18111 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18114 /*****************************************************************************
18116 * Morph irregular parameters
18117 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18119 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree)
18121 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18125 #else // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18127 bool changed = false;
18129 // Implicit byref morphing needs to know if the reference to the parameter is a
18130 // child of GT_ADDR or not, so this method looks one level down and does the
18131 // rewrite whenever a child is a reference to an implicit byref parameter.
18132 if (tree->gtOper == GT_ADDR)
18134 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18136 GenTreePtr morphedTree = fgMorphImplicitByRefArgs(tree, true);
18137 changed = (morphedTree != nullptr);
18138 assert(!changed || (morphedTree == tree));
18143 for (GenTreePtr* pTree : tree->UseEdges())
18145 GenTreePtr childTree = *pTree;
18146 if (childTree->gtOper == GT_LCL_VAR)
18148 GenTreePtr newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18149 if (newChildTree != nullptr)
18152 *pTree = newChildTree;
18159 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18162 GenTreePtr Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, bool isAddr)
18164 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18165 assert(isAddr == (tree->gtOper == GT_ADDR));
18167 GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18168 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
18169 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
18171 CORINFO_FIELD_HANDLE fieldHnd;
18172 unsigned fieldOffset = 0;
18173 var_types fieldRefType = TYP_UNKNOWN;
18175 if (lvaIsImplicitByRefLocal(lclNum))
18177 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18178 // re-invoke the traversal to mark address-taken locals.
18179 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18180 // If we do, leave it as-is.
18181 if (!varTypeIsStruct(lclVarTree))
18183 assert(lclVarTree->TypeGet() == TYP_BYREF);
18187 else if (lclVarDsc->lvPromoted)
18189 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18190 // arg. Rewrite this to refer to the new local.
18191 assert(lclVarDsc->lvFieldLclStart != 0);
18192 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18196 fieldHnd = nullptr;
18198 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18200 // This was a field reference to an implicit-by-reference struct parameter that was
18201 // dependently promoted; update it to a field reference off the pointer.
18202 // Grab the field handle from the struct field lclVar.
18203 fieldHnd = lclVarDsc->lvFieldHnd;
18204 fieldOffset = lclVarDsc->lvFldOffset;
18205 assert(fieldHnd != nullptr);
18206 // Update lclNum/lclVarDsc to refer to the parameter
18207 lclNum = lclVarDsc->lvParentLcl;
18208 lclVarDsc = &lvaTable[lclNum];
18209 fieldRefType = lclVarTree->TypeGet();
18213 // We only need to tranform the 'marked' implicit by ref parameters
18217 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
18218 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
18222 if (fieldHnd == nullptr)
18224 // change &X into just plain X
18225 tree->CopyFrom(lclVarTree, this);
18226 tree->gtType = TYP_BYREF;
18230 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
18231 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
18232 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
18233 lclVarTree->gtType = TYP_BYREF;
18234 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
18240 printf("Replacing address of implicit by ref struct parameter with byref:\n");
18246 // Change X into OBJ(X) or FIELD(X, f)
18247 var_types structType = tree->gtType;
18248 tree->gtType = TYP_BYREF;
18252 tree->gtLclVarCommon.SetLclNum(lclNum);
18253 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
18257 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
18260 if (structType == TYP_STRUCT)
18262 gtSetObjGcInfo(tree->AsObj());
18265 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
18266 // we could remove TGTANYWHERE
18267 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
18272 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
18287 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
18288 enum AddrExposedContext
18290 AXC_None, // None of the below seen yet.
18291 AXC_Ind, // The address being computed is to be dereferenced.
18292 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
18293 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
18294 // addresses -- if the address addresses a field of a struct local, we need to consider
18295 // the entire local address taken (not just the field).
18296 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
18297 // on more bytes than the width of the storage location addressed. If this is a
18298 // field of a promoted struct local, declare the entire struct local address-taken.
18299 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
18300 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
18303 typedef ArrayStack<AddrExposedContext> AXCStack;
18305 // We use pre-post to simulate passing an argument in a recursion, via a stack.
18306 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18308 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18309 (void)axcStack->Pop();
18310 return WALK_CONTINUE;
18313 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18315 GenTreePtr tree = *pTree;
18316 Compiler* comp = fgWalkPre->compiler;
18317 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18318 AddrExposedContext axc = axcStack->Top();
18320 // In some situations, we have to figure out what the effective context is in which to
18321 // evaluate the current tree, depending on which argument position it is in its parent.
18328 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18329 assert(parent->OperGet() == GT_ADD);
18330 // Is one of the args a constant representing a field offset,
18331 // and is this the other? If so, Ind context.
18332 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
18336 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
18351 // Now recurse properly for the tree.
18352 switch (tree->gtOper)
18355 if (axc != AXC_Addr)
18357 axcStack->Push(AXC_Ind);
18361 axcStack->Push(AXC_None);
18363 return WALK_CONTINUE;
18367 if (axc == AXC_Addr)
18369 axcStack->Push(AXC_None);
18371 else if (tree->TypeGet() == TYP_STRUCT)
18373 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
18374 // or copyblk exceeds the size of a storage location whose address is used as one of the
18375 // arguments, then we have to consider that storage location (indeed, it's underlying containing
18376 // location) to be address taken. So get the width of the initblk or copyblk.
18378 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18379 GenTreeBlk* blk = tree->AsBlk();
18380 unsigned width = blk->gtBlkSize;
18381 noway_assert(width != 0);
18383 GenTree* addr = blk->Addr();
18384 if (addr->OperGet() == GT_ADDR)
18386 if (parent->gtOper == GT_ASG)
18388 if ((tree == parent->gtOp.gtOp1) &&
18389 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
18396 assert(parent->gtOper == GT_CALL);
18399 axcStack->Push(axc);
18403 // This is like a regular GT_IND.
18404 axcStack->Push(AXC_Ind);
18406 return WALK_CONTINUE;
18409 // Assume maximal width.
18410 axcStack->Push(AXC_IndWide);
18411 return WALK_CONTINUE;
18414 case GT_FIELD_LIST:
18415 axcStack->Push(AXC_None);
18416 return WALK_CONTINUE;
18419 // Taking the address of an array element never takes the address of a local.
18420 axcStack->Push(AXC_None);
18421 return WALK_CONTINUE;
18424 #ifdef FEATURE_SIMD
18425 if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
18427 axcStack->Push(AXC_None);
18430 #endif // FEATURE_SIMD
18431 if (axc == AXC_Ind)
18433 axcStack->Push(AXC_None);
18435 else if (axc == AXC_IndWide)
18437 axcStack->Push(AXC_AddrWide);
18441 assert(axc == AXC_None);
18442 axcStack->Push(AXC_Addr);
18444 return WALK_CONTINUE;
18447 // First, handle a couple of special cases: field of promoted struct local, field
18448 // of "normed" struct.
18449 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
18451 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
18452 // label it addr-taken.
18453 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
18455 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18456 comp->lvaSetVarAddrExposed(lclNum);
18457 if (axc == AXC_AddrWide)
18459 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18460 if (varDsc->lvIsStructField)
18462 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18466 // Push something to keep the PostCB, which will pop it, happy.
18467 axcStack->Push(AXC_None);
18468 return WALK_SKIP_SUBTREES;
18472 // GT_FIELD is an implicit deref.
18473 if (axc == AXC_Addr)
18475 axcStack->Push(AXC_None);
18477 else if (axc == AXC_AddrWide)
18479 axcStack->Push(AXC_IndWide);
18483 axcStack->Push(AXC_Ind);
18485 return WALK_CONTINUE;
18490 assert(axc != AXC_Addr);
18491 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18492 if (comp->lvaIsImplicitByRefLocal(lclNum))
18494 // Keep track of the number of appearances of each promoted implicit
18495 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18496 // checks the ref counts for implicit byref params when deciding if it's legal
18497 // to elide certain copies of them.
18498 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18499 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18500 varDsc->lvRefCnt + 1, lclNum);
18502 varDsc->lvRefCnt++;
18504 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
18505 // else WALK_CONTINUE. We do the same here.
18506 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
18507 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
18509 comp->lvaSetVarAddrExposed(lclNum);
18510 if (axc == AXC_AddrWide)
18512 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18513 if (varDsc->lvIsStructField)
18515 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18519 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
18520 // what, but something to be popped by the post callback. If we're going
18521 // to analyze children, the LCL_FLD creates an Ind context, so use that.
18522 axcStack->Push(AXC_Ind);
18528 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18529 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18531 if (comp->lvaIsImplicitByRefLocal(lclNum))
18533 // Keep track of the number of appearances of each promoted implicit
18534 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18535 // checks the ref counts for implicit byref params when deciding if it's legal
18536 // to elide certain copies of them.
18537 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18538 varDsc->lvRefCnt + 1, lclNum);
18540 varDsc->lvRefCnt++;
18543 if (axc == AXC_Addr || axc == AXC_AddrWide)
18545 comp->lvaSetVarAddrExposed(lclNum);
18546 if (axc == AXC_AddrWide)
18548 if (varDsc->lvIsStructField)
18550 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18554 // We may need to Quirk the storage size for this LCL_VAR
18555 // some PInvoke signatures incorrectly specify a ByRef to an INT32
18556 // when they actually write a SIZE_T or INT64
18557 if (axc == AXC_Addr)
18559 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
18562 // Push something to keep the PostCB, which will pop it, happy.
18563 axcStack->Push(AXC_None);
18564 // The tree is a leaf.
18565 return WALK_SKIP_SUBTREES;
18569 assert(axc != AXC_Addr);
18570 // See below about treating pointer operations as wider indirection.
18571 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
18573 axcStack->Push(AXC_IndWide);
18575 else if (axc == AXC_Ind)
18577 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
18578 // If it's an add of a constant and an address, and the constant represents a field,
18579 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
18580 axcStack->Push(AXC_IndAdd);
18584 axcStack->Push(axc);
18586 return WALK_CONTINUE;
18588 // !!! Treat Pointer Operations as Wider Indirection
18590 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
18591 // indirection. This is because the pointers could be pointing to the address of struct fields
18592 // and could be used to perform operations on the whole struct or passed to another method.
18594 // When visiting a node in this pre-order walk, we do not know if we would in the future
18595 // encounter a GT_ADDR of a GT_FIELD below.
18597 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
18598 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
18599 // wider indirection context down the expr tree.
18601 // Example, in unsafe code,
18603 // IL_000e 12 00 ldloca.s 0x0
18604 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
18605 // IL_0015 12 00 ldloca.s 0x0
18606 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
18609 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
18610 // consider GT_SUB to be equivalent of an AXC_IndWide.
18612 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
18613 // them as AXC_IndWide.
18637 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
18638 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
18640 axcStack->Push(AXC_IndWide);
18641 return WALK_CONTINUE;
18646 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
18647 // handle the "Ind" propogation explicitly above.
18648 if (axc == AXC_Addr || axc == AXC_AddrWide)
18650 axcStack->Push(axc);
18654 axcStack->Push(AXC_None);
18656 return WALK_CONTINUE;
18660 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
18662 if (tree->TypeGet() != TYP_STRUCT)
18664 return width <= genTypeSize(tree->TypeGet());
18666 else if (tree->OperGet() == GT_LCL_VAR)
18668 assert(tree->TypeGet() == TYP_STRUCT);
18669 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18670 return width <= lvaTable[lclNum].lvExactSize;
18672 else if (tree->OperGet() == GT_FIELD)
18674 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
18675 return width <= info.compCompHnd->getClassSize(fldClass);
18677 else if (tree->OperGet() == GT_INDEX)
18679 return width <= tree->gtIndex.gtIndElemSize;
18687 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
18689 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18691 switch (op1->OperGet())
18694 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18696 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18697 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18702 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18704 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18705 if (op1Fs != nullptr)
18707 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18708 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18711 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18713 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18714 if (op2Fs != nullptr)
18716 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18717 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18724 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18725 if (op1Fs != nullptr)
18727 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18728 op1->gtIntCon.gtFieldSeq = op1Fs;
18734 // Record in the general zero-offset map.
18735 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18740 /*****************************************************************************
18742 * Mark address-taken locals.
18745 void Compiler::fgMarkAddressExposedLocals()
18750 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18754 BasicBlock* block = fgFirstBB;
18755 noway_assert(block);
18759 /* Make the current basic block address available globally */
18765 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18767 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18768 AXCStack stk(this);
18769 stk.Push(AXC_None); // We start in neither an addr or ind context.
18770 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18773 block = block->bbNext;
18778 // fgNodesMayInterfere:
18779 // return true if moving nodes relative to each other can change the result of a computation
18782 // read: a node which reads
18785 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18787 LclVarDsc* srcVar = nullptr;
18789 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
18790 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18792 if (read->OperIsLocal())
18794 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18799 if (srcVar && srcVar->lvAddrExposed)
18803 else if (readIsIndir)
18809 else if (write->OperIsLocal())
18811 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18814 return dstVar->lvAddrExposed;
18816 else if (read->OperIsLocal())
18818 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18835 /** This predicate decides whether we will fold a tree with the structure:
18836 * x = x <op> y where x could be any arbitrary expression into
18839 * This modification is only performed when the target architecture supports
18840 * complex addressing modes. In the case of ARM for example, this transformation
18841 * yields no benefit.
18843 * In case this functions decides we can proceed to fold into an assignment operator
18844 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18845 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18846 * x <op>= y because the operator property.
18848 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18850 #if CPU_LOAD_STORE_ARCH
18851 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18853 #elif !defined(LEGACY_BACKEND)
18855 #else // defined(LEGACY_BACKEND)
18857 GenTreePtr op1 = tree->gtOp.gtOp1;
18858 GenTreePtr op2 = tree->gtGetOp2();
18859 genTreeOps cmop = op2->OperGet();
18861 /* Is the destination identical to the first RHS sub-operand? */
18862 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18865 Do not transform the following tree
18867 [0024CFA4] ----------- const int 1
18868 [0024CFDC] ----G------ | int
18869 [0024CF5C] ----------- lclVar ubyte V01 tmp0
18870 [0024D05C] -A--G------ = ubyte
18871 [0024D014] D------N--- lclVar ubyte V01 tmp0
18875 [0024CFA4] ----------- const int 1
18876 [0024D05C] -A--G------ |= ubyte
18877 [0024D014] U------N--- lclVar ubyte V01 tmp0
18879 , when V01 is a struct field local.
18882 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18884 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
18885 LclVarDsc* varDsc = lvaTable + lclNum;
18887 if (varDsc->lvIsStructField)
18896 else if (GenTree::OperIsCommutative(cmop))
18898 /* For commutative ops only, check for "a = x <op> a" */
18900 /* Should we be doing this at all? */
18901 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18906 /* Can we swap the operands to cmop ... */
18907 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18909 // Both sides must have side effects to prevent swap */
18913 /* Is the destination identical to the second RHS sub-operand? */
18914 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18921 #endif // defined(LEGACY_BACKEND)
18924 #ifdef FEATURE_SIMD
18926 //-----------------------------------------------------------------------------------
18927 // fgMorphCombineSIMDFieldAssignments:
18928 // If the RHS of the input stmt is a read for simd vector X Field, then this function
18929 // will keep reading next few stmts based on the vector size(2, 3, 4).
18930 // If the next stmts LHS are located contiguous and RHS are also located
18931 // contiguous, then we replace those statements with a copyblk.
18934 // block - BasicBlock*. block which stmt belongs to
18935 // stmt - GenTreeStmt*. the stmt node we want to check
18938 // if this funciton successfully optimized the stmts, then return true. Otherwise
18941 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18944 noway_assert(stmt->gtOper == GT_STMT);
18945 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18946 assert(tree->OperGet() == GT_ASG);
18948 GenTreePtr originalLHS = tree->gtOp.gtOp1;
18949 GenTreePtr prevLHS = tree->gtOp.gtOp1;
18950 GenTreePtr prevRHS = tree->gtOp.gtOp2;
18951 unsigned index = 0;
18952 var_types baseType = TYP_UNKNOWN;
18953 unsigned simdSize = 0;
18954 GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18956 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18958 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18962 var_types simdType = getSIMDTypeForSize(simdSize);
18963 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18964 int remainingAssignments = assignmentsCount;
18965 GenTreePtr curStmt = stmt->gtNext;
18966 GenTreePtr lastStmt = stmt;
18968 while (curStmt != nullptr && remainingAssignments > 0)
18970 GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18971 if (exp->OperGet() != GT_ASG)
18975 GenTreePtr curLHS = exp->gtGetOp1();
18976 GenTreePtr curRHS = exp->gtGetOp2();
18978 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18983 remainingAssignments--;
18987 lastStmt = curStmt;
18988 curStmt = curStmt->gtNext;
18991 if (remainingAssignments > 0)
18993 // if the left assignments number is bigger than zero, then this means
18994 // that the assignments are not assgining to the contiguously memory
18995 // locations from same vector.
19001 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
19002 printf("From BB%02u, stmt", block->bbNum);
19004 printf(" to stmt");
19005 printTreeID(lastStmt);
19010 for (int i = 0; i < assignmentsCount; i++)
19012 fgRemoveStmt(block, stmt->gtNext);
19015 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
19016 if (simdStructNode->OperIsLocal())
19018 setLclRelatedToSIMDIntrinsic(simdStructNode);
19020 GenTree* copyBlkAddr = copyBlkDst;
19021 if (copyBlkAddr->gtOper == GT_LEA)
19023 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
19025 GenTreeLclVarCommon* localDst = nullptr;
19026 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
19028 setLclRelatedToSIMDIntrinsic(localDst);
19031 GenTree* simdStructAddr;
19032 if (simdStructNode->TypeGet() == TYP_BYREF)
19034 assert(simdStructNode->OperIsLocal());
19035 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
19036 simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
19040 assert(varTypeIsSIMD(simdStructNode));
19046 printf("\nBB%02u stmt", block->bbNum);
19048 printf("(before)\n");
19053 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
19054 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
19055 blkNode->gtType = simdType;
19056 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
19057 false, // not volatile
19058 true); // copyBlock
19060 stmt->gtStmt.gtStmtExpr = tree;
19062 // Since we generated a new address node which didn't exist before,
19063 // we should expose this address manually here.
19064 AXCStack stk(this);
19065 stk.Push(AXC_None);
19066 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19071 printf("\nReplaced BB%02u stmt", block->bbNum);
19073 printf("(after)\n");
19080 #endif // FEATURE_SIMD