1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTreePtr oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtFlags |= GTF_CALL;
69 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
71 tree->gtCall.gtCallType = CT_HELPER;
72 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
73 tree->gtCall.gtCallArgs = args;
74 tree->gtCall.gtCallObjp = nullptr;
75 tree->gtCall.gtCallLateArgs = nullptr;
76 tree->gtCall.fgArgInfo = nullptr;
77 tree->gtCall.gtRetClsHnd = nullptr;
78 tree->gtCall.gtCallMoreFlags = 0;
79 tree->gtCall.gtInlineCandidateInfo = nullptr;
80 tree->gtCall.gtControlExpr = nullptr;
83 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
87 // Helper calls are never candidates.
89 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
92 #ifdef FEATURE_READYTORUN_COMPILER
93 tree->gtCall.gtEntryPoint.addr = nullptr;
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97 if (varTypeIsLong(tree))
99 GenTreeCall* callNode = tree->AsCall();
100 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101 retTypeDesc->Reset();
102 retTypeDesc->InitializeLongReturnType(this);
103 callNode->ClearOtherRegs();
105 #endif // _TARGET_XXX_
107 /* Perform the morphing */
109 tree = fgMorphArgs(tree->AsCall());
114 /*****************************************************************************
116 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
117 * This is done when code generation can't create straight-line code to do it.
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
121 #ifndef LEGACY_BACKEND
123 #else // LEGACY_BACKEND
124 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
128 /*****************************************************************************
130 * Morph a cast node (we perform some very simple transformations here).
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
139 noway_assert(tree->gtOper == GT_CAST);
140 noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
142 /* The first sub-operand is the thing being cast */
144 GenTreePtr oper = tree->gtCast.CastOp();
146 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
148 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
149 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
150 // morphing code to see that type.
151 fgMorphImplicitByRefArgs(oper);
154 var_types srcType = genActualType(oper->TypeGet());
157 var_types dstType = tree->CastToType();
158 unsigned dstSize = genTypeSize(dstType);
160 // See if the cast has to be done in two steps. R -> I
161 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
163 // Only x86 must go through TYP_DOUBLE to get to all
164 // integral types everybody else can get straight there
165 // except for when using helpers
166 if (srcType == TYP_FLOAT
167 #if !FEATURE_STACK_FP_X87
169 #if defined(_TARGET_ARM64_)
170 // Amd64: src = float, dst is overflow conversion.
171 // This goes through helper and hence src needs to be converted to double.
172 && tree->gtOverflow()
173 #elif defined(_TARGET_AMD64_)
174 // Amd64: src = float, dst = uint64 or overflow conversion.
175 // This goes through helper and hence src needs to be converted to double.
176 && (tree->gtOverflow() || (dstType == TYP_ULONG))
177 #elif defined(_TARGET_ARM_)
178 // Arm: src = float, dst = int64/uint64 or overflow conversion.
179 && (tree->gtOverflow() || varTypeIsLong(dstType))
182 #endif // FEATURE_STACK_FP_X87
185 oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
188 // do we need to do it in two steps R -> I, '-> smallType
189 CLANG_FORMAT_COMMENT_ANCHOR;
191 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
192 if (dstSize < genTypeSize(TYP_INT))
194 oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
195 oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
196 tree->gtFlags &= ~GTF_UNSIGNED;
199 if (dstSize < sizeof(void*))
201 oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
202 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
207 /* Note that if we need to use a helper call then we can not morph oper */
208 if (!tree->gtOverflow())
210 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
216 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
217 #ifdef LEGACY_BACKEND
218 // the RyuJIT backend does not use the x87 FPU and therefore
219 // does not support folding the cast conv.i4(round.d(d))
220 if ((oper->gtOper == GT_INTRINSIC) &&
221 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
223 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
224 oper->gtType = dstType;
225 return fgMorphTree(oper);
227 // if SSE2 is not enabled, we need the helper
229 #endif // LEGACY_BACKEND
230 if (!opts.compCanUseSSE2)
232 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
235 #endif // _TARGET_X86_
239 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
242 #else // _TARGET_ARM_
244 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
245 #endif // _TARGET_ARM_
247 #ifdef _TARGET_AMD64_
248 // SSE2 has instructions to convert a float/double directly to a long
253 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
254 #endif //_TARGET_AMD64_
256 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
260 #endif // _TARGET_ARM64_
267 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
269 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
271 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
273 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
278 noway_assert(!"Unexpected dstType");
281 #ifndef _TARGET_64BIT_
282 // The code generation phase (for x86 & ARM32) does not handle casts
283 // directly from [u]long to anything other than [u]int. Insert an
284 // intermediate cast to native int.
285 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
287 oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
288 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
289 tree->gtFlags &= ~GTF_UNSIGNED;
291 #endif //!_TARGET_64BIT_
294 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
295 !varTypeIsLong(oper->gtCast.CastOp()))
297 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
298 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
299 // This happens semi-frequently because there is no IL 'conv.r4.un'
300 oper->gtType = TYP_FLOAT;
301 oper->CastToType() = TYP_FLOAT;
302 return fgMorphTree(oper);
304 // converts long/ulong --> float/double casts into helper calls.
305 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
307 if (dstType == TYP_FLOAT)
309 // there is only a double helper, so we
310 // - change the dsttype to double
311 // - insert a cast from double to float
312 // - recurse into the resulting tree
313 tree->CastToType() = TYP_DOUBLE;
314 tree->gtType = TYP_DOUBLE;
316 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
318 return fgMorphTree(tree);
320 if (tree->gtFlags & GTF_UNSIGNED)
321 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
322 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
324 #endif //_TARGET_ARM_
326 #ifdef _TARGET_AMD64_
327 // Do we have to do two step U4/8 -> R4/8 ?
328 // Codegen supports the following conversion as one-step operation
332 // The following conversions are performed as two-step operations using above.
333 // U4 -> R4/8 = U4-> Long -> R4/8
334 // U8 -> R4 = U8 -> R8 -> R4
335 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
337 srcType = genUnsignedType(srcType);
339 if (srcType == TYP_ULONG)
341 if (dstType == TYP_FLOAT)
343 // Codegen can handle U8 -> R8 conversion.
344 // U8 -> R4 = U8 -> R8 -> R4
345 // - change the dsttype to double
346 // - insert a cast from double to float
347 // - recurse into the resulting tree
348 tree->CastToType() = TYP_DOUBLE;
349 tree->gtType = TYP_DOUBLE;
350 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
351 return fgMorphTree(tree);
354 else if (srcType == TYP_UINT)
356 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
357 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
358 tree->gtFlags &= ~GTF_UNSIGNED;
361 #endif // _TARGET_AMD64_
364 // Do we have to do two step U4/8 -> R4/8 ?
365 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
367 srcType = genUnsignedType(srcType);
369 if (srcType == TYP_ULONG)
371 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
373 else if (srcType == TYP_UINT)
375 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
376 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
377 tree->gtFlags &= ~GTF_UNSIGNED;
378 #ifndef LEGACY_BACKEND
379 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
383 #ifndef LEGACY_BACKEND
384 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
386 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
389 #endif //_TARGET_XARCH_
390 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
392 // We are casting away GC information. we would like to just
393 // change the type to int, however this gives the emitter fits because
394 // it believes the variable is a GC variable at the begining of the
395 // instruction group, but is not turned non-gc by the code generator
396 // we fix this by copying the GC pointer to a non-gc pointer temp.
397 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
399 // We generate an assignment to an int and then do the cast from an int. With this we avoid
400 // the gc problem and we allow casts to bytes, longs, etc...
401 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
402 oper->gtType = TYP_I_IMPL;
403 GenTreePtr asg = gtNewTempAssign(lclNum, oper);
404 oper->gtType = srcType;
407 GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
409 // Generate the comma tree
410 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
412 return fgMorphTree(oper);
415 // Look for narrowing casts ([u]long -> [u]int) and try to push them
416 // down into the operand before morphing it.
418 // It doesn't matter if this is cast is from ulong or long (i.e. if
419 // GTF_UNSIGNED is set) because the transformation is only applied to
420 // overflow-insensitive narrowing casts, which always silently truncate.
422 // Note that casts from [u]long to small integer types are handled above.
423 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
425 // As a special case, look for overflow-sensitive casts of an AND
426 // expression, and see if the second operand is a small constant. Since
427 // the result of an AND is bound by its smaller operand, it may be
428 // possible to prove that the cast won't overflow, which will in turn
429 // allow the cast's operand to be transformed.
430 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
432 GenTreePtr andOp2 = oper->gtOp.gtOp2;
434 // Special case to the special case: AND with a casted int.
435 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
437 // gtFoldExprConst will deal with whether the cast is signed or
438 // unsigned, or overflow-sensitive.
439 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
442 // Look for a constant less than 2^{32} for a cast to uint, or less
443 // than 2^{31} for a cast to int.
444 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
446 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
448 // This cast can't overflow.
449 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
453 // Only apply this transformation during global morph,
454 // when neither the cast node nor the oper node may throw an exception
455 // based on the upper 32 bits.
457 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
459 // For these operations the lower 32 bits of the result only depends
460 // upon the lower 32 bits of the operands
462 if (oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG, GT_LSH))
464 DEBUG_DESTROY_NODE(tree);
466 // Insert narrowing casts for op1 and op2
467 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
468 if (oper->gtOp.gtOp2 != nullptr)
470 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
473 // Clear the GT_MUL_64RSLT if it is set
474 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
476 oper->gtFlags &= ~GTF_MUL_64RSLT;
479 // The operation now produces a 32-bit result.
480 oper->gtType = TYP_INT;
482 // Remorph the new tree as the casts that we added may be folded away.
483 return fgMorphTree(oper);
489 noway_assert(tree->gtOper == GT_CAST);
491 /* Morph the operand */
492 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
494 /* Reset the call flag */
495 tree->gtFlags &= ~GTF_CALL;
497 /* unless we have an overflow cast, reset the except flag */
498 if (!tree->gtOverflow())
500 tree->gtFlags &= ~GTF_EXCEPT;
503 /* Just in case new side effects were introduced */
504 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
506 srcType = oper->TypeGet();
508 /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
509 if (tree->gtFlags & GTF_UNSIGNED)
511 srcType = genUnsignedType(srcType);
514 srcSize = genTypeSize(srcType);
516 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
518 /* See if we can discard the cast */
519 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
521 if (srcType == dstType)
522 { // Certainly if they are identical it is pointless
526 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
528 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
529 LclVarDsc* varDsc = &lvaTable[varNum];
530 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
536 bool unsignedSrc = varTypeIsUnsigned(srcType);
537 bool unsignedDst = varTypeIsUnsigned(dstType);
538 bool signsDiffer = (unsignedSrc != unsignedDst);
540 // For same sized casts with
541 // the same signs or non-overflow cast we discard them as well
542 if (srcSize == dstSize)
544 /* This should have been handled above */
545 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
552 if (!tree->gtOverflow())
554 /* For small type casts, when necessary we force
555 the src operand to the dstType and allow the
556 implied load from memory to perform the casting */
557 if (varTypeIsSmall(srcType))
559 switch (oper->gtOper)
565 oper->gtType = dstType;
578 if (srcSize < dstSize) // widening cast
580 // Keep any long casts
581 if (dstSize == sizeof(int))
583 // Only keep signed to unsigned widening cast with overflow check
584 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
590 // Casts from signed->unsigned can never overflow while widening
592 if (unsignedSrc || !unsignedDst)
594 tree->gtFlags &= ~GTF_OVERFLOW;
599 // Try to narrow the operand of the cast and discard the cast
600 // Note: Do not narrow a cast that is marked as a CSE
601 // And do not narrow if the oper is marked as a CSE either
603 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
604 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
606 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
608 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
609 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
611 oper = oper->gtCast.CastOp();
618 switch (oper->gtOper)
620 /* If the operand is a constant, we'll fold it */
626 GenTreePtr oldTree = tree;
628 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
630 // Did we get a comma throw as a result of gtFoldExprConst?
631 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
633 noway_assert(fgIsCommaThrow(tree));
634 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
635 fgMorphTreeDone(tree);
638 else if (tree->gtOper != GT_CAST)
643 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
648 /* Check for two consecutive casts into the same dstType */
649 if (!tree->gtOverflow())
651 var_types dstType2 = oper->CastToType();
652 if (dstType == dstType2)
659 #ifdef LEGACY_BACKEND
661 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
662 so that the code generator will know not to convert the result
663 of the idiv to a regpair */
665 if (dstType == TYP_INT)
667 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
672 if (dstType == TYP_UINT)
674 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
678 #endif // LEGACY_BACKEND
681 // Check for cast of a GT_COMMA with a throw overflow
682 // Bug 110829: Since this optimization will bash the types
683 // neither oper or commaOp2 can be CSE candidates
684 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
686 GenTreePtr commaOp2 = oper->gtOp.gtOp2;
688 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
690 // need type of oper to be same as tree
691 if (tree->gtType == TYP_LONG)
693 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
694 commaOp2->gtIntConCommon.SetLngValue(0);
695 /* Change the types of oper and commaOp2 to TYP_LONG */
696 oper->gtType = commaOp2->gtType = TYP_LONG;
698 else if (varTypeIsFloating(tree->gtType))
700 commaOp2->ChangeOperConst(GT_CNS_DBL);
701 commaOp2->gtDblCon.gtDconVal = 0.0;
702 // Change the types of oper and commaOp2
703 // X87 promotes everything to TYP_DOUBLE
704 // But other's are a little more precise
705 const var_types newTyp
706 #if FEATURE_X87_DOUBLES
708 #else // FEATURE_X87_DOUBLES
710 #endif // FEATURE_X87_DOUBLES
711 oper->gtType = commaOp2->gtType = newTyp;
715 commaOp2->ChangeOperConst(GT_CNS_INT);
716 commaOp2->gtIntCon.gtIconVal = 0;
717 /* Change the types of oper and commaOp2 to TYP_INT */
718 oper->gtType = commaOp2->gtType = TYP_INT;
722 if (vnStore != nullptr)
724 fgValueNumberTreeConst(commaOp2);
727 /* Return the GT_COMMA node as the new tree */
734 } /* end switch (oper->gtOper) */
737 if (tree->gtOverflow())
739 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
746 /* Here we've eliminated the cast, so just return it's operand */
747 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
749 DEBUG_DESTROY_NODE(tree);
756 /*****************************************************************************
758 * Perform an unwrap operation on a Proxy object
761 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
763 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
765 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
768 // Perform the unwrap:
770 // This requires two extra indirections.
771 // We mark these indirections as 'invariant' and
772 // the CSE logic will hoist them when appropriate.
774 // Note that each dereference is a GC pointer
776 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
778 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
779 objRef->gtFlags |= GTF_IND_INVARIANT;
781 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
783 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
784 objRef->gtFlags |= GTF_IND_INVARIANT;
786 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
790 /*****************************************************************************
792 * Morph an argument list; compute the pointer argument count in the process.
794 * NOTE: This function can be called from any place in the JIT to perform re-morphing
795 * due to graph altering modifications such as copy / constant propagation
798 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
800 assert(tree->gtOper == GT_LIST);
803 if (tree->gtOp.gtOp2)
805 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
808 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
810 tree->gtFlags &= ~GTF_ALL_EFFECT;
811 tree->gtFlags |= flags;
813 return tree->gtFlags;
817 void fgArgTabEntry::Dump()
819 printf("fgArgTabEntry[arg %u", argNum);
820 if (regNum != REG_STK)
822 printf(", %s, regs=%u", getRegName(regNum), numRegs);
826 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
828 printf(", align=%u", alignment);
829 if (lateArgInx != (unsigned)-1)
831 printf(", lateArgInx=%u", lateArgInx);
839 printf(", tmpNum=V%02u", tmpNum);
843 printf(", needPlace");
851 printf(", processed");
859 printf(", isBackFilled");
863 printf(", isNonStandard");
869 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
873 argCount = 0; // filled in arg count, starts at zero
874 nextSlotNum = INIT_ARG_STACK_SLOT;
876 #if defined(UNIX_X86_ABI)
877 alignmentDone = false;
881 #if FEATURE_FIXED_OUT_ARGS
885 argTableSize = numArgs; // the allocated table size
888 hasStackArgs = false;
889 argsComplete = false;
892 if (argTableSize == 0)
898 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
902 /*****************************************************************************
904 * fgArgInfo Copy Constructor
906 * This method needs to act like a copy constructor for fgArgInfo.
907 * The newCall needs to have its fgArgInfo initialized such that
908 * we have newCall that is an exact copy of the oldCall.
909 * We have to take care since the argument information
910 * in the argTable contains pointers that must point to the
911 * new arguments and not the old arguments.
913 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
915 fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
917 compiler = oldArgInfo->compiler;
919 argCount = 0; // filled in arg count, starts at zero
920 nextSlotNum = INIT_ARG_STACK_SLOT;
921 stkLevel = oldArgInfo->stkLevel;
922 #if defined(UNIX_X86_ABI)
923 alignmentDone = oldArgInfo->alignmentDone;
924 stkSizeBytes = oldArgInfo->stkSizeBytes;
925 padStkAlign = oldArgInfo->padStkAlign;
927 #if FEATURE_FIXED_OUT_ARGS
928 outArgSize = oldArgInfo->outArgSize;
930 argTableSize = oldArgInfo->argTableSize;
931 argsComplete = false;
933 if (argTableSize > 0)
935 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
936 for (unsigned inx = 0; inx < argTableSize; inx++)
938 argTable[inx] = nullptr;
942 assert(oldArgInfo->argsComplete);
944 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
945 // so we can iterate over these argument lists more uniformly.
946 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
947 GenTreeArgList* newArgs;
948 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
949 GenTreeArgList* oldArgs;
950 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
952 if (newCall->gtCallObjp == nullptr)
954 assert(oldCall->gtCallObjp == nullptr);
955 newArgs = newCall->gtCallArgs;
956 oldArgs = oldCall->gtCallArgs;
960 assert(oldCall->gtCallObjp != nullptr);
961 newArgObjp.Current() = newCall->gtCallArgs;
962 newArgs = &newArgObjp;
963 oldArgObjp.Current() = oldCall->gtCallObjp;
964 oldArgs = &oldArgObjp;
969 GenTreeArgList* newParent = nullptr;
970 GenTreeArgList* oldParent = nullptr;
971 fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
972 bool scanRegArgs = false;
976 /* Get hold of the next argument values for the oldCall and newCall */
978 newCurr = newArgs->Current();
979 oldCurr = oldArgs->Current();
980 if (newArgs != &newArgObjp)
987 assert(newParent == nullptr && oldParent == nullptr);
989 newArgs = newArgs->Rest();
990 oldArgs = oldArgs->Rest();
992 fgArgTabEntryPtr oldArgTabEntry = nullptr;
993 fgArgTabEntryPtr newArgTabEntry = nullptr;
995 for (unsigned inx = 0; inx < argTableSize; inx++)
997 oldArgTabEntry = oldArgTable[inx];
999 if (oldArgTabEntry->parent == oldParent)
1001 assert((oldParent == nullptr) == (newParent == nullptr));
1003 // We have found the matching "parent" field in oldArgTabEntry
1005 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1007 // First block copy all fields
1009 *newArgTabEntry = *oldArgTabEntry;
1011 // Then update all GenTreePtr fields in the newArgTabEntry
1013 newArgTabEntry->parent = newParent;
1015 // The node field is likely to have been updated
1016 // to point at a node in the gtCallLateArgs list
1018 if (oldArgTabEntry->node == oldCurr)
1020 // node is not pointing into the gtCallLateArgs list
1021 newArgTabEntry->node = newCurr;
1025 // node must be pointing into the gtCallLateArgs list
1027 // We will fix this pointer up in the next loop
1029 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1034 // Now initialize the proper element in the argTable array
1036 argTable[inx] = newArgTabEntry;
1040 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1042 assert(newArgTabEntry != nullptr);
1047 newArgs = newCall->gtCallLateArgs;
1048 oldArgs = oldCall->gtCallLateArgs;
1052 /* Get hold of the next argument values for the oldCall and newCall */
1054 assert(newArgs->OperIsList());
1056 newCurr = newArgs->Current();
1057 newArgs = newArgs->Rest();
1059 assert(oldArgs->OperIsList());
1061 oldCurr = oldArgs->Current();
1062 oldArgs = oldArgs->Rest();
1064 fgArgTabEntryPtr oldArgTabEntry = nullptr;
1065 fgArgTabEntryPtr newArgTabEntry = nullptr;
1067 for (unsigned inx = 0; inx < argTableSize; inx++)
1069 oldArgTabEntry = oldArgTable[inx];
1071 if (oldArgTabEntry->node == oldCurr)
1073 // We have found the matching "node" field in oldArgTabEntry
1075 newArgTabEntry = argTable[inx];
1076 assert(newArgTabEntry != nullptr);
1078 // update the "node" GenTreePtr fields in the newArgTabEntry
1080 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1082 newArgTabEntry->node = newCurr;
1089 argCount = oldArgInfo->argCount;
1090 nextSlotNum = oldArgInfo->nextSlotNum;
1091 hasRegArgs = oldArgInfo->hasRegArgs;
1092 hasStackArgs = oldArgInfo->hasStackArgs;
1093 argsComplete = true;
1097 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1099 assert(argCount < argTableSize);
1100 argTable[argCount] = curArgTabEntry;
1104 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1105 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1107 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1109 curArgTabEntry->argNum = argNum;
1110 curArgTabEntry->node = node;
1111 curArgTabEntry->parent = parent;
1112 curArgTabEntry->regNum = regNum;
1113 curArgTabEntry->slotNum = 0;
1114 curArgTabEntry->numRegs = numRegs;
1115 curArgTabEntry->numSlots = 0;
1116 curArgTabEntry->alignment = alignment;
1117 curArgTabEntry->lateArgInx = (unsigned)-1;
1118 curArgTabEntry->tmpNum = (unsigned)-1;
1119 curArgTabEntry->isSplit = false;
1120 curArgTabEntry->isTmp = false;
1121 curArgTabEntry->needTmp = false;
1122 curArgTabEntry->needPlace = false;
1123 curArgTabEntry->processed = false;
1124 curArgTabEntry->isHfaRegArg = false;
1125 curArgTabEntry->isBackFilled = false;
1126 curArgTabEntry->isNonStandard = false;
1129 AddArg(curArgTabEntry);
1130 return curArgTabEntry;
1133 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1134 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1140 const bool isStruct,
1141 const regNumber otherRegNum,
1142 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1144 fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1145 assert(curArgTabEntry != nullptr);
1147 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1148 // PlaceHolder node (in case of needed late argument, for example.)
1149 // This requires using of an extra flag. At creation time the state is right, so
1150 // and this assert enforces that.
1151 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1152 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1153 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1155 if (isStruct && structDescPtr != nullptr)
1157 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1160 return curArgTabEntry;
1162 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1164 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
1169 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1171 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1173 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1175 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1176 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1177 // PlaceHolder node (in case of needed late argument, for example.)
1178 // This reqires using of an extra flag. At creation time the state is right, so
1179 // and this assert enforces that.
1180 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1181 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1182 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1184 curArgTabEntry->argNum = argNum;
1185 curArgTabEntry->node = node;
1186 curArgTabEntry->parent = parent;
1187 curArgTabEntry->regNum = REG_STK;
1188 curArgTabEntry->slotNum = nextSlotNum;
1189 curArgTabEntry->numRegs = 0;
1190 curArgTabEntry->numSlots = numSlots;
1191 curArgTabEntry->alignment = alignment;
1192 curArgTabEntry->lateArgInx = (unsigned)-1;
1193 curArgTabEntry->tmpNum = (unsigned)-1;
1194 curArgTabEntry->isSplit = false;
1195 curArgTabEntry->isTmp = false;
1196 curArgTabEntry->needTmp = false;
1197 curArgTabEntry->needPlace = false;
1198 curArgTabEntry->processed = false;
1199 curArgTabEntry->isHfaRegArg = false;
1200 curArgTabEntry->isBackFilled = false;
1201 curArgTabEntry->isNonStandard = false;
1203 hasStackArgs = true;
1204 AddArg(curArgTabEntry);
1206 nextSlotNum += numSlots;
1207 return curArgTabEntry;
1210 void fgArgInfo::RemorphReset()
1212 nextSlotNum = INIT_ARG_STACK_SLOT;
1215 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1216 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1218 fgArgTabEntryPtr curArgTabEntry = nullptr;
1219 unsigned regArgInx = 0;
1222 for (inx = 0; inx < argCount; inx++)
1224 curArgTabEntry = argTable[inx];
1225 if (curArgTabEntry->argNum == argNum)
1232 if (curArgTabEntry->parent != nullptr)
1234 assert(curArgTabEntry->parent->OperIsList());
1235 argx = curArgTabEntry->parent->Current();
1236 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1240 argx = curArgTabEntry->node;
1249 // if this was a nonstandard arg the table is definitive
1250 if (curArgTabEntry->isNonStandard)
1252 regNum = curArgTabEntry->regNum;
1255 assert(curArgTabEntry->argNum == argNum);
1256 assert(curArgTabEntry->regNum == regNum);
1257 assert(curArgTabEntry->alignment == alignment);
1258 assert(curArgTabEntry->parent == parent);
1260 if (curArgTabEntry->node != node)
1262 GenTreePtr argx = nullptr;
1263 unsigned regIndex = 0;
1265 /* process the register argument list */
1266 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1268 argx = list->Current();
1269 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1270 if (regIndex == regArgInx)
1275 assert(regIndex == regArgInx);
1276 assert(regArgInx == curArgTabEntry->lateArgInx);
1278 if (curArgTabEntry->node != argx)
1280 curArgTabEntry->node = argx;
1283 return curArgTabEntry;
1286 void fgArgInfo::RemorphStkArg(
1287 unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1289 fgArgTabEntryPtr curArgTabEntry = nullptr;
1290 bool isRegArg = false;
1291 unsigned regArgInx = 0;
1295 for (inx = 0; inx < argCount; inx++)
1297 curArgTabEntry = argTable[inx];
1299 if (curArgTabEntry->parent != nullptr)
1301 assert(curArgTabEntry->parent->OperIsList());
1302 argx = curArgTabEntry->parent->Current();
1303 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1307 argx = curArgTabEntry->node;
1311 if (curArgTabEntry->argNum == argNum)
1322 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1324 assert(curArgTabEntry->argNum == argNum);
1325 assert(curArgTabEntry->slotNum == nextSlotNum);
1326 assert(curArgTabEntry->numSlots == numSlots);
1327 assert(curArgTabEntry->alignment == alignment);
1328 assert(curArgTabEntry->parent == parent);
1329 assert(parent->OperIsList());
1331 #if FEATURE_FIXED_OUT_ARGS
1332 if (curArgTabEntry->node != node)
1336 GenTreePtr argx = nullptr;
1337 unsigned regIndex = 0;
1339 /* process the register argument list */
1340 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1342 argx = list->Current();
1343 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1344 if (regIndex == regArgInx)
1349 assert(regIndex == regArgInx);
1350 assert(regArgInx == curArgTabEntry->lateArgInx);
1352 if (curArgTabEntry->node != argx)
1354 curArgTabEntry->node = argx;
1359 assert(parent->Current() == node);
1360 curArgTabEntry->node = node;
1364 curArgTabEntry->node = node;
1367 nextSlotNum += numSlots;
1370 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1372 fgArgTabEntryPtr curArgTabEntry = nullptr;
1373 assert(argNum < argCount);
1374 for (unsigned inx = 0; inx < argCount; inx++)
1376 curArgTabEntry = argTable[inx];
1377 if (curArgTabEntry->argNum == argNum)
1383 assert(numRegs > 0);
1384 assert(numSlots > 0);
1386 curArgTabEntry->isSplit = true;
1387 curArgTabEntry->numRegs = numRegs;
1388 curArgTabEntry->numSlots = numSlots;
1390 nextSlotNum += numSlots;
1393 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1395 fgArgTabEntryPtr curArgTabEntry = nullptr;
1396 assert(argNum < argCount);
1397 for (unsigned inx = 0; inx < argCount; inx++)
1399 curArgTabEntry = argTable[inx];
1400 if (curArgTabEntry->argNum == argNum)
1405 assert(curArgTabEntry->parent->Current() == newNode);
1407 curArgTabEntry->node = newNode;
1408 curArgTabEntry->tmpNum = tmpNum;
1409 curArgTabEntry->isTmp = true;
1412 void fgArgInfo::ArgsComplete()
1414 bool hasStackArgs = false;
1415 bool hasStructRegArg = false;
1417 for (unsigned curInx = 0; curInx < argCount; curInx++)
1419 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1420 assert(curArgTabEntry != nullptr);
1421 GenTreePtr argx = curArgTabEntry->node;
1423 if (curArgTabEntry->regNum == REG_STK)
1425 hasStackArgs = true;
1426 #if !FEATURE_FIXED_OUT_ARGS
1427 // On x86 we use push instructions to pass arguments:
1428 // The non-register arguments are evaluated and pushed in order
1429 // and they are never evaluated into temps
1434 else // we have a register argument, next we look for a struct type.
1436 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1438 hasStructRegArg = true;
1442 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1443 and every earlier argument (except constants) must be evaluated into temps
1444 since there may be other arguments that follow and they may use the value being assigned.
1446 EXAMPLE: ArgTab is "a, a=5, a"
1447 -> when we see the second arg "a=5"
1448 we know the first two arguments "a, a=5" have to be evaluated into temps
1450 For the case of an assignment, we only know that there exist some assignment someplace
1451 in the tree. We don't know what is being assigned so we are very conservative here
1452 and assume that any local variable could have been assigned.
1455 if (argx->gtFlags & GTF_ASG)
1457 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1458 // a tmp, then we need a temp in the late arg list.
1459 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1460 #ifdef FEATURE_FIXED_OUT_ARGS
1461 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1462 // that we only have late non-register args when that feature is on.
1463 #endif // FEATURE_FIXED_OUT_ARGS
1466 curArgTabEntry->needTmp = true;
1469 // For all previous arguments, unless they are a simple constant
1470 // we require that they be evaluated into temps
1471 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1473 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1474 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1476 assert(prevArgTabEntry->node);
1477 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1479 prevArgTabEntry->needTmp = true;
1484 #if FEATURE_FIXED_OUT_ARGS
1485 // Like calls, if this argument has a tree that will do an inline throw,
1486 // a call to a jit helper, then we need to treat it like a call (but only
1487 // if there are/were any stack args).
1488 // This means unnesting, sorting, etc. Technically this is overly
1489 // conservative, but I want to avoid as much special-case debug-only code
1490 // as possible, so leveraging the GTF_CALL flag is the easiest.
1491 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1492 compiler->opts.compDbgCode &&
1493 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1495 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1497 if (otherInx == curInx)
1502 if (argTable[otherInx]->regNum == REG_STK)
1504 argx->gtFlags |= GTF_CALL;
1509 #endif // FEATURE_FIXED_OUT_ARGS
1511 /* If it contains a call (GTF_CALL) then itself and everything before the call
1512 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1513 has to be kept in the right order since we will move the call to the first position)
1515 For calls we don't have to be quite as conservative as we are with an assignment
1516 since the call won't be modifying any non-address taken LclVars.
1519 if (argx->gtFlags & GTF_CALL)
1521 if (argCount > 1) // If this is not the only argument
1523 curArgTabEntry->needTmp = true;
1525 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1527 // Spill all arguments that are floating point calls
1528 curArgTabEntry->needTmp = true;
1531 // All previous arguments may need to be evaluated into temps
1532 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1534 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1535 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1536 assert(prevArgTabEntry->node);
1538 // For all previous arguments, if they have any GTF_ALL_EFFECT
1539 // we require that they be evaluated into a temp
1540 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1542 prevArgTabEntry->needTmp = true;
1544 #if FEATURE_FIXED_OUT_ARGS
1545 // Or, if they are stored into the FIXED_OUT_ARG area
1546 // we require that they be moved to the gtCallLateArgs
1547 // and replaced with a placeholder node
1548 else if (prevArgTabEntry->regNum == REG_STK)
1550 prevArgTabEntry->needPlace = true;
1556 #ifndef LEGACY_BACKEND
1557 #if FEATURE_MULTIREG_ARGS
1558 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1559 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1561 // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
1562 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1565 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1567 if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1569 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1571 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1572 curArgTabEntry->needTmp = true;
1576 // We call gtPrepareCost to measure the cost of evaluating this tree
1577 compiler->gtPrepareCost(argx);
1579 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1581 // Spill multireg struct arguments that are expensive to evaluate twice
1582 curArgTabEntry->needTmp = true;
1584 else if (argx->OperGet() == GT_OBJ)
1586 GenTreeObj* argObj = argx->AsObj();
1587 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1588 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1595 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1597 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1599 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1600 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1602 curArgTabEntry->needTmp = true;
1610 // Spill any GT_OBJ multireg structs that are difficult to extract
1612 // When we have a GT_OBJ of a struct with the above sizes we would need
1613 // to use 3 or 4 load instructions to load the exact size of this struct.
1614 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1615 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1616 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1619 curArgTabEntry->needTmp = true;
1628 #endif // FEATURE_MULTIREG_ARGS
1629 #endif // LEGACY_BACKEND
1632 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1633 // if we don't have qmarks, then it doesn't matter.
1634 // So check for Qmark's globally once here, instead of inside the loop.
1636 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1638 #if FEATURE_FIXED_OUT_ARGS
1640 // For Arm/x64 we only care because we can't reorder a register
1641 // argument that uses GT_LCLHEAP. This is an optimization to
1642 // save a check inside the below loop.
1644 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1648 const bool hasStackArgsWeCareAbout = hasStackArgs;
1650 #endif // FEATURE_FIXED_OUT_ARGS
1652 // If we have any stack args we have to force the evaluation
1653 // of any arguments passed in registers that might throw an exception
1655 // Technically we only a required to handle the following two cases:
1656 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1657 // a GT_LCLHEAP node that allocates stuff on the stack
1659 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1661 for (unsigned curInx = 0; curInx < argCount; curInx++)
1663 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1664 assert(curArgTabEntry != nullptr);
1665 GenTreePtr argx = curArgTabEntry->node;
1667 // Examine the register args that are currently not marked needTmp
1669 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1671 if (hasStackArgsWeCareAbout)
1673 #if !FEATURE_FIXED_OUT_ARGS
1674 // On x86 we previously recorded a stack depth of zero when
1675 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1676 // Thus we can not reorder the argument after any stack based argument
1677 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1678 // check for it explicitly
1680 if (argx->gtFlags & GTF_EXCEPT)
1682 curArgTabEntry->needTmp = true;
1686 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1688 if (argx->gtFlags & GTF_EXCEPT)
1690 assert(compiler->compLocallocUsed);
1692 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1694 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1696 curArgTabEntry->needTmp = true;
1702 if (hasStructRegArgWeCareAbout)
1704 // Returns true if a GT_QMARK node is encountered in the argx tree
1706 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1708 curArgTabEntry->needTmp = true;
1716 argsComplete = true;
1719 void fgArgInfo::SortArgs()
1721 assert(argsComplete == true);
1724 if (compiler->verbose)
1726 printf("\nSorting the arguments:\n");
1730 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1731 The idea is to move all "simple" arguments like constants and local vars
1732 to the end of the table, and move the complex arguments towards the beginning
1733 of the table. This will help prevent registers from being spilled by
1734 allowing us to evaluate the more complex arguments before the simpler arguments.
1735 The argTable ends up looking like:
1736 +------------------------------------+ <--- argTable[argCount - 1]
1738 +------------------------------------+
1739 | local var / local field |
1740 +------------------------------------+
1741 | remaining arguments sorted by cost |
1742 +------------------------------------+
1743 | temps (argTable[].needTmp = true) |
1744 +------------------------------------+
1745 | args with calls (GTF_CALL) |
1746 +------------------------------------+ <--- argTable[0]
1749 /* Set the beginning and end for the new argument table */
1752 unsigned begTab = 0;
1753 unsigned endTab = argCount - 1;
1754 unsigned argsRemaining = argCount;
1756 // First take care of arguments that are constants.
1757 // [We use a backward iterator pattern]
1764 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1766 if (curArgTabEntry->regNum != REG_STK)
1771 // Skip any already processed args
1773 if (!curArgTabEntry->processed)
1775 GenTreePtr argx = curArgTabEntry->node;
1777 // put constants at the end of the table
1779 if (argx->gtOper == GT_CNS_INT)
1781 noway_assert(curInx <= endTab);
1783 curArgTabEntry->processed = true;
1785 // place curArgTabEntry at the endTab position by performing a swap
1787 if (curInx != endTab)
1789 argTable[curInx] = argTable[endTab];
1790 argTable[endTab] = curArgTabEntry;
1797 } while (curInx > 0);
1799 if (argsRemaining > 0)
1801 // Next take care of arguments that are calls.
1802 // [We use a forward iterator pattern]
1804 for (curInx = begTab; curInx <= endTab; curInx++)
1806 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1808 // Skip any already processed args
1810 if (!curArgTabEntry->processed)
1812 GenTreePtr argx = curArgTabEntry->node;
1814 // put calls at the beginning of the table
1816 if (argx->gtFlags & GTF_CALL)
1818 curArgTabEntry->processed = true;
1820 // place curArgTabEntry at the begTab position by performing a swap
1822 if (curInx != begTab)
1824 argTable[curInx] = argTable[begTab];
1825 argTable[begTab] = curArgTabEntry;
1835 if (argsRemaining > 0)
1837 // Next take care arguments that are temps.
1838 // These temps come before the arguments that are
1839 // ordinary local vars or local fields
1840 // since this will give them a better chance to become
1841 // enregistered into their actual argument register.
1842 // [We use a forward iterator pattern]
1844 for (curInx = begTab; curInx <= endTab; curInx++)
1846 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1848 // Skip any already processed args
1850 if (!curArgTabEntry->processed)
1852 if (curArgTabEntry->needTmp)
1854 curArgTabEntry->processed = true;
1856 // place curArgTabEntry at the begTab position by performing a swap
1858 if (curInx != begTab)
1860 argTable[curInx] = argTable[begTab];
1861 argTable[begTab] = curArgTabEntry;
1871 if (argsRemaining > 0)
1873 // Next take care of local var and local field arguments.
1874 // These are moved towards the end of the argument evaluation.
1875 // [We use a backward iterator pattern]
1877 curInx = endTab + 1;
1882 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1884 // Skip any already processed args
1886 if (!curArgTabEntry->processed)
1888 GenTreePtr argx = curArgTabEntry->node;
1890 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1892 noway_assert(curInx <= endTab);
1894 curArgTabEntry->processed = true;
1896 // place curArgTabEntry at the endTab position by performing a swap
1898 if (curInx != endTab)
1900 argTable[curInx] = argTable[endTab];
1901 argTable[endTab] = curArgTabEntry;
1908 } while (curInx > begTab);
1911 // Finally, take care of all the remaining arguments.
1912 // Note that we fill in one arg at a time using a while loop.
1913 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1914 while (argsRemaining > 0)
1916 /* Find the most expensive arg remaining and evaluate it next */
1918 fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1919 unsigned expensiveArg = UINT_MAX;
1920 unsigned expensiveArgCost = 0;
1922 // [We use a forward iterator pattern]
1924 for (curInx = begTab; curInx <= endTab; curInx++)
1926 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1928 // Skip any already processed args
1930 if (!curArgTabEntry->processed)
1932 GenTreePtr argx = curArgTabEntry->node;
1934 // We should have already handled these kinds of args
1935 assert(argx->gtOper != GT_LCL_VAR);
1936 assert(argx->gtOper != GT_LCL_FLD);
1937 assert(argx->gtOper != GT_CNS_INT);
1939 // This arg should either have no persistent side effects or be the last one in our table
1940 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1942 if (argsRemaining == 1)
1944 // This is the last arg to place
1945 expensiveArg = curInx;
1946 expensiveArgTabEntry = curArgTabEntry;
1947 assert(begTab == endTab);
1954 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1955 compiler->gtPrepareCost(argx);
1958 if (argx->gtCostEx > expensiveArgCost)
1960 // Remember this arg as the most expensive one that we have yet seen
1961 expensiveArgCost = argx->gtCostEx;
1962 expensiveArg = curInx;
1963 expensiveArgTabEntry = curArgTabEntry;
1969 noway_assert(expensiveArg != UINT_MAX);
1971 // put the most expensive arg towards the beginning of the table
1973 expensiveArgTabEntry->processed = true;
1975 // place expensiveArgTabEntry at the begTab position by performing a swap
1977 if (expensiveArg != begTab)
1979 argTable[expensiveArg] = argTable[begTab];
1980 argTable[begTab] = expensiveArgTabEntry;
1986 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
1989 // The table should now be completely filled and thus begTab should now be adjacent to endTab
1990 // and regArgsRemaining should be zero
1991 assert(begTab == (endTab + 1));
1992 assert(argsRemaining == 0);
1994 #if !FEATURE_FIXED_OUT_ARGS
1995 // Finally build the regArgList
1997 callTree->gtCall.regArgList = NULL;
1998 callTree->gtCall.regArgListCount = regCount;
2000 unsigned regInx = 0;
2001 for (curInx = 0; curInx < argCount; curInx++)
2003 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2005 if (curArgTabEntry->regNum != REG_STK)
2007 // Encode the argument register in the register mask
2009 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2013 #endif // !FEATURE_FIXED_OUT_ARGS
2018 //------------------------------------------------------------------------------
2019 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2020 // We need this to be done in order to enforce ordering
2021 // of the evaluation of arguments.
2024 // tmpVarNum - the var num which we clone into the newly created temp var.
2027 // the newly created temp var tree.
2029 GenTreePtr Compiler::fgMakeTmpArgNode(
2030 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2032 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2033 assert(varDsc->lvIsTemp);
2034 var_types type = varDsc->TypeGet();
2036 // Create a copy of the temp to go into the late argument list
2037 GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
2038 GenTreePtr addrNode = nullptr;
2040 if (varTypeIsStruct(type))
2043 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2045 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2047 arg->gtFlags |= GTF_DONT_CSE;
2049 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2050 // Can this type be passed in a single register?
2051 // If so, the following call will return the corresponding primitive type.
2052 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2054 bool passedInRegisters = false;
2055 structPassingKind kind;
2056 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2057 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2059 if (structBaseType != TYP_UNKNOWN)
2061 passedInRegisters = true;
2062 type = structBaseType;
2064 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2066 // If it is passed in registers, don't get the address of the var. Make it a
2067 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2068 if (passedInRegisters)
2070 arg->ChangeOper(GT_LCL_FLD);
2075 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2076 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2077 var_types addrType = type;
2079 var_types addrType = TYP_BYREF;
2081 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2084 #if FEATURE_MULTIREG_ARGS
2085 #ifdef _TARGET_ARM64_
2086 assert(varTypeIsStruct(type));
2087 if (lvaIsMultiregStruct(varDsc))
2089 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2090 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2091 // We will create a GT_OBJ for the argument below.
2092 // This will be passed by value in two registers.
2093 assert(addrNode != nullptr);
2095 // Create an Obj of the temp to use it as a call argument.
2096 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2098 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2099 // this is only to preserve former behavior (though some CSE'ing of struct
2100 // values can be pessimizing, so enabling this may require some additional tuning).
2101 arg->gtFlags |= GTF_DONT_CSE;
2103 #endif // _TARGET_ARM64_
2104 #endif // FEATURE_MULTIREG_ARGS
2107 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2109 // other targets, we pass the struct by value
2110 assert(varTypeIsStruct(type));
2112 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2114 // Get a new Obj node temp to use it as a call argument.
2115 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2116 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2118 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2120 } // (varTypeIsStruct(type))
2122 if (addrNode != nullptr)
2124 assert(addrNode->gtOper == GT_ADDR);
2126 // This will prevent this LclVar from being optimized away
2127 lvaSetVarAddrExposed(tmpVarNum);
2129 // the child of a GT_ADDR is required to have this flag set
2130 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2136 void fgArgInfo::EvalArgsToTemps()
2138 assert(argsSorted == true);
2140 unsigned regArgInx = 0;
2141 // Now go through the argument table and perform the necessary evaluation into temps
2142 GenTreeArgList* tmpRegArgNext = nullptr;
2143 for (unsigned curInx = 0; curInx < argCount; curInx++)
2145 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2147 GenTreePtr argx = curArgTabEntry->node;
2148 GenTreePtr setupArg = nullptr;
2151 #if !FEATURE_FIXED_OUT_ARGS
2152 // Only ever set for FEATURE_FIXED_OUT_ARGS
2153 assert(curArgTabEntry->needPlace == false);
2155 // On x86 and other archs that use push instructions to pass arguments:
2156 // Only the register arguments need to be replaced with placeholder nodes.
2157 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2159 if (curArgTabEntry->regNum == REG_STK)
2163 if (curArgTabEntry->needTmp)
2167 if (curArgTabEntry->isTmp == true)
2169 // Create a copy of the temp to go into the late argument list
2170 tmpVarNum = curArgTabEntry->tmpNum;
2171 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2172 argTable[curInx]->structDesc.passedInRegisters));
2174 // mark the original node as a late argument
2175 argx->gtFlags |= GTF_LATE_ARG;
2179 // Create a temp assignment for the argument
2180 // Put the temp in the gtCallLateArgs list
2181 CLANG_FORMAT_COMMENT_ANCHOR;
2184 if (compiler->verbose)
2186 printf("Argument with 'side effect'...\n");
2187 compiler->gtDispTree(argx);
2191 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2192 noway_assert(argx->gtType != TYP_STRUCT);
2195 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2196 if (argx->gtOper == GT_MKREFANY)
2198 // For GT_MKREFANY, typically the actual struct copying does
2199 // not have any side-effects and can be delayed. So instead
2200 // of using a temp for the whole struct, we can just use a temp
2201 // for operand that that has a side-effect
2203 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2205 operand = argx->gtOp.gtOp1;
2207 // In the early argument evaluation, place an assignment to the temp
2208 // from the source operand of the mkrefany
2209 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2211 // Replace the operand for the mkrefany with the new temp.
2212 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2214 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2216 operand = argx->gtOp.gtOp2;
2218 // In the early argument evaluation, place an assignment to the temp
2219 // from the source operand of the mkrefany
2220 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2222 // Replace the operand for the mkrefany with the new temp.
2223 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2227 if (setupArg != nullptr)
2229 // Now keep the mkrefany for the late argument list
2232 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2233 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2237 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2239 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2241 #ifndef LEGACY_BACKEND
2242 if (compiler->fgOrder == Compiler::FGOrderLinear)
2244 // We'll reference this temporary variable just once
2245 // when we perform the function call after
2246 // setting up this argument.
2247 varDsc->lvRefCnt = 1;
2249 #endif // !LEGACY_BACKEND
2251 var_types lclVarType = genActualType(argx->gtType);
2252 var_types scalarType = TYP_UNKNOWN;
2254 if (setupArg->OperIsCopyBlkOp())
2256 setupArg = compiler->fgMorphCopyBlock(setupArg);
2257 #ifdef _TARGET_ARM64_
2258 // This scalar LclVar widening step is only performed for ARM64
2260 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2261 unsigned structSize = varDsc->lvExactSize;
2263 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2264 #endif // _TARGET_ARM64_
2267 // scalarType can be set to a wider type for ARM64: (3 => 4) or (5,6,7 => 8)
2268 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2270 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2271 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2275 // Create a copy of the temp to go to the late argument list
2276 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2279 curArgTabEntry->isTmp = true;
2280 curArgTabEntry->tmpNum = tmpVarNum;
2283 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2284 // might have left holes in the used registers (see
2285 // fgAddSkippedRegsInPromotedStructArg).
2286 // Too bad we're not that smart for these intermediate temps...
2287 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2289 regNumber argReg = curArgTabEntry->regNum;
2290 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2291 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2293 argReg = genRegArgNext(argReg);
2294 allUsedRegs |= genRegMask(argReg);
2296 #ifdef LEGACY_BACKEND
2297 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2298 #endif // LEGACY_BACKEND
2300 #endif // _TARGET_ARM_
2303 /* mark the assignment as a late argument */
2304 setupArg->gtFlags |= GTF_LATE_ARG;
2307 if (compiler->verbose)
2309 printf("\n Evaluate to a temp:\n");
2310 compiler->gtDispTree(setupArg);
2315 else // curArgTabEntry->needTmp == false
2318 // Only register args are replaced with placeholder nodes
2319 // and the stack based arguments are evaluated and pushed in order.
2321 // On Arm/x64 - When needTmp is false and needPlace is false,
2322 // the non-register arguments are evaluated and stored in order.
2323 // When needPlace is true we have a nested call that comes after
2324 // this argument so we have to replace it in the gtCallArgs list
2325 // (the initial argument evaluation list) with a placeholder.
2327 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2332 /* No temp needed - move the whole node to the gtCallLateArgs list */
2334 /* The argument is deferred and put in the late argument list */
2338 // Create a placeholder node to put in its place in gtCallLateArgs.
2340 // For a struct type we also need to record the class handle of the arg.
2341 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2343 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2345 // All structs are either passed (and retyped) as integral types, OR they
2346 // are passed by reference.
2347 noway_assert(argx->gtType != TYP_STRUCT);
2349 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2351 if (varTypeIsStruct(defArg))
2353 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2354 GenTreePtr defArgTmp = defArg;
2356 // The GT_OBJ may be be a child of a GT_COMMA.
2357 while (defArgTmp->gtOper == GT_COMMA)
2359 defArgTmp = defArgTmp->gtOp.gtOp2;
2361 assert(varTypeIsStruct(defArgTmp));
2363 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2364 if (defArgTmp->gtOper == GT_MKREFANY)
2366 clsHnd = compiler->impGetRefAnyClass();
2368 else if (defArgTmp->gtOper == GT_OBJ)
2370 clsHnd = defArgTmp->AsObj()->gtClass;
2374 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2378 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2380 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2382 /* mark the placeholder node as a late argument */
2383 setupArg->gtFlags |= GTF_LATE_ARG;
2386 if (compiler->verbose)
2388 if (curArgTabEntry->regNum == REG_STK)
2390 printf("Deferred stack argument :\n");
2394 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2397 compiler->gtDispTree(argx);
2398 printf("Replaced with placeholder node:\n");
2399 compiler->gtDispTree(setupArg);
2404 if (setupArg != nullptr)
2406 if (curArgTabEntry->parent)
2408 GenTreePtr parent = curArgTabEntry->parent;
2409 /* a normal argument from the list */
2410 noway_assert(parent->OperIsList());
2411 noway_assert(parent->gtOp.gtOp1 == argx);
2413 parent->gtOp.gtOp1 = setupArg;
2417 /* must be the gtCallObjp */
2418 noway_assert(callTree->gtCall.gtCallObjp == argx);
2420 callTree->gtCall.gtCallObjp = setupArg;
2424 /* deferred arg goes into the late argument list */
2426 if (tmpRegArgNext == nullptr)
2428 tmpRegArgNext = compiler->gtNewArgList(defArg);
2429 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2433 noway_assert(tmpRegArgNext->OperIsList());
2434 noway_assert(tmpRegArgNext->Current());
2435 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2436 tmpRegArgNext = tmpRegArgNext->Rest();
2439 curArgTabEntry->node = defArg;
2440 curArgTabEntry->lateArgInx = regArgInx++;
2444 if (compiler->verbose)
2446 printf("\nShuffled argument table: ");
2447 for (unsigned curInx = 0; curInx < argCount; curInx++)
2449 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2451 if (curArgTabEntry->regNum != REG_STK)
2453 printf("%s ", getRegName(curArgTabEntry->regNum));
2461 // Get the late arg for arg at position argIndex.
2462 // argIndex - 0-based position to get late arg for.
2463 // Caller must ensure this position has a late arg.
2464 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2466 for (unsigned j = 0; j < this->ArgCount(); j++)
2468 if (this->ArgTable()[j]->argNum == argIndex)
2470 return this->ArgTable()[j]->node;
2473 // Caller must ensure late arg exists.
2477 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2479 assert(!IsUninitialized(stkLvl));
2480 this->stkLevel = stkLvl;
2483 unsigned fgArgInfo::RetrieveStkLevel()
2485 assert(!IsUninitialized(stkLevel));
2489 // Return a conservative estimate of the stack size in bytes.
2490 // It will be used only on the intercepted-for-host code path to copy the arguments.
2491 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2495 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2501 if (numArgs > MAX_REG_ARG)
2503 numStkArgs = numArgs - MAX_REG_ARG;
2510 return numStkArgs * REGSIZE_BYTES;
2513 //------------------------------------------------------------------------------
2514 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2515 // otherwise insert a comma form temp
2518 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2519 // evaluates ppTree to a temp and returns the result
2522 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2525 // The result tree MUST be added to the tree structure since the ref counts are
2526 // already incremented.
2528 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2530 GenTree* tree = *pOp;
2531 if (tree->IsLocal())
2533 auto result = gtClone(tree);
2534 if (lvaLocalVarRefCounted)
2536 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2542 GenTree* result = fgInsertCommaFormTemp(pOp);
2544 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2545 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2546 // be added by the caller.
2547 if (lvaLocalVarRefCounted)
2549 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2550 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2551 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2558 //------------------------------------------------------------------------------
2559 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2560 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2563 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2564 // evaluates ppTree to a temp and returns the result
2566 // structType - value type handle if the temp created is of TYP_STRUCT.
2569 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2572 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2574 GenTree* subTree = *ppTree;
2576 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2578 if (varTypeIsStruct(subTree))
2580 assert(structType != nullptr);
2581 lvaSetStruct(lclNum, structType, false);
2584 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2585 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2586 // setting type of lcl vars created.
2587 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2589 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2591 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2595 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2598 //------------------------------------------------------------------------
2599 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2602 // callNode - the call for which we are doing the argument morphing
2605 // Like most morph methods, this method returns the morphed node,
2606 // though in this case there are currently no scenarios where the
2607 // node itself is re-created.
2610 // This method is even less idempotent than most morph methods.
2611 // That is, it makes changes that should not be redone. It uses the existence
2612 // of gtCallLateArgs (the late arguments list) to determine if it has
2613 // already done that work.
2615 // The first time it is called (i.e. during global morphing), this method
2616 // computes the "late arguments". This is when it determines which arguments
2617 // need to be evaluated to temps prior to the main argument setup, and which
2618 // can be directly evaluated into the argument location. It also creates a
2619 // second argument list (gtCallLateArgs) that does the final placement of the
2620 // arguments, e.g. into registers or onto the stack.
2622 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2623 // evaluation of the arguments that might have side-effects, such as embedded
2624 // assignments, calls or possible throws. In these cases, it and earlier
2625 // arguments must be evaluated to temps.
2627 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2628 // if we have any nested calls, we need to defer the copying of the argument
2629 // into the fixed argument area until after the call. If the argument did not
2630 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2631 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2634 #pragma warning(push)
2635 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2637 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2642 unsigned flagsSummary = 0;
2643 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2645 unsigned argIndex = 0;
2647 unsigned intArgRegNum = 0;
2648 unsigned fltArgRegNum = 0;
2651 regMaskTP argSkippedRegMask = RBM_NONE;
2652 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2653 #endif // _TARGET_ARM_
2655 #if defined(_TARGET_X86_)
2656 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2658 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2661 unsigned argSlots = 0;
2662 unsigned nonRegPassedStructSlots = 0;
2663 bool reMorphing = call->AreArgsComplete();
2664 bool callHasRetBuffArg = call->HasRetBufArg();
2666 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2667 bool callIsVararg = call->IsVarargs();
2670 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2671 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2672 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2673 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2674 bool hasStackArgCopy = false;
2677 #ifndef LEGACY_BACKEND
2678 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2679 // following the normal calling convention or in the normal argument registers. We either mark existing
2680 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2681 // non-standard arguments into the argument list, below.
2682 class NonStandardArgs
2684 struct NonStandardArg
2686 regNumber reg; // The register to be assigned to this non-standard argument.
2687 GenTree* node; // The tree node representing this non-standard argument.
2688 // Note that this must be updated if the tree node changes due to morphing!
2691 ArrayStack<NonStandardArg> args;
2694 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2698 //-----------------------------------------------------------------------------
2699 // Add: add a non-standard argument to the table of non-standard arguments
2702 // node - a GenTree node that has a non-standard argument.
2703 // reg - the register to assign to this node.
2708 void Add(GenTree* node, regNumber reg)
2710 NonStandardArg nsa = {reg, node};
2714 //-----------------------------------------------------------------------------
2715 // Find: Look for a GenTree* in the set of non-standard args.
2718 // node - a GenTree node to look for
2721 // The index of the non-standard argument (a non-negative, unique, stable number).
2722 // If the node is not a non-standard argument, return -1.
2724 int Find(GenTree* node)
2726 for (int i = 0; i < args.Height(); i++)
2728 if (node == args.Index(i).node)
2736 //-----------------------------------------------------------------------------
2737 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2738 // set the register to use for the node.
2741 // node - a GenTree node to look for
2742 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2743 // 'node' is found in the non-standard argument set.
2746 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2748 // 'false' otherwise (in this case, *pReg is unmodified).
2750 bool FindReg(GenTree* node, regNumber* pReg)
2752 for (int i = 0; i < args.Height(); i++)
2754 NonStandardArg& nsa = args.IndexRef(i);
2755 if (node == nsa.node)
2764 //-----------------------------------------------------------------------------
2765 // Replace: Replace the non-standard argument node at a given index. This is done when
2766 // the original node was replaced via morphing, but we need to continue to assign a
2767 // particular non-standard arg to it.
2770 // index - the index of the non-standard arg. It must exist.
2771 // node - the new GenTree node.
2776 void Replace(int index, GenTree* node)
2778 args.IndexRef(index).node = node;
2781 } nonStandardArgs(this);
2782 #endif // !LEGACY_BACKEND
2784 // Count of args. On first morph, this is counted before we've filled in the arg table.
2785 // On remorph, we grab it from the arg table.
2786 unsigned numArgs = 0;
2788 // Process the late arguments (which were determined by a previous caller).
2789 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2790 // may need to refer to it.
2793 // We need to reMorph the gtCallLateArgs early since that is what triggers
2794 // the expression folding and we need to have the final folded gtCallLateArgs
2795 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2796 // with the folded tree that represents the final optimized argument nodes.
2798 // However if a range-check needs to be generated for any of these late
2799 // arguments we also need to "know" what the stack depth will be when we generate
2800 // code to branch to the throw range check failure block as that is part of the
2801 // GC information contract for that block.
2803 // Since the late arguments are evaluated last we have pushed all of the
2804 // other arguments on the stack before we evaluate these late arguments,
2805 // so we record the stack depth on the first morph call when reMorphing
2806 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2808 if (call->gtCallLateArgs != nullptr)
2810 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2811 fgPtrArgCntCur += callStkLevel;
2812 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2813 flagsSummary |= call->gtCallLateArgs->gtFlags;
2814 fgPtrArgCntCur -= callStkLevel;
2816 assert(call->fgArgInfo != nullptr);
2817 call->fgArgInfo->RemorphReset();
2819 numArgs = call->fgArgInfo->ArgCount();
2823 // First we need to count the args
2824 if (call->gtCallObjp)
2828 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2833 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2834 // arguments registers that don't follow the normal progression of argument registers in the calling
2835 // convention (such as for the ARM64 fixed return buffer argument x8).
2837 // *********** NOTE *************
2838 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2839 // in the implementation of fast tail call.
2840 // *********** END NOTE *********
2841 CLANG_FORMAT_COMMENT_ANCHOR;
2843 #if !defined(LEGACY_BACKEND)
2844 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2845 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2846 // Set the argument registers correctly here.
2847 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2849 GenTreeArgList* args = call->gtCallArgs;
2850 GenTree* arg1 = args->Current();
2851 assert(arg1 != nullptr);
2852 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2854 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2855 #if defined(_TARGET_X86_)
2856 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2857 // hi part to be in EDX. This sets the argument registers up correctly.
2858 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2859 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2861 GenTreeArgList* args = call->gtCallArgs;
2862 GenTree* arg1 = args->Current();
2863 assert(arg1 != nullptr);
2864 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2866 args = args->Rest();
2867 GenTree* arg2 = args->Current();
2868 assert(arg2 != nullptr);
2869 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2871 #else // !defined(_TARGET_X86_)
2872 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2873 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2874 // convention for x86/SSE.
2876 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2878 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2880 args = call->gtCallArgs;
2881 assert(args != nullptr);
2882 assert(args->OperIsList());
2884 argx = call->gtCallArgs->Current();
2886 // We don't increment numArgs here, since we already counted this argument above.
2888 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2891 // We are allowed to have a Fixed Return Buffer argument combined
2892 // with any of the remaining non-standard arguments
2894 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2896 assert(!call->gtCallCookie);
2897 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2898 // It will be used only on the intercepted-for-host code path to copy the arguments.
2900 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2901 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2904 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2906 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2908 // indirect VSD stubs need the base of the indirection cell to be
2909 // passed in addition. At this point that is the value in gtCallAddr.
2910 // The actual call target will be derived from gtCallAddr in call
2913 // If it is a VSD call getting dispatched via tail call helper,
2914 // fgMorphTailCall() would materialize stub addr as an additional
2915 // parameter added to the original arg list and hence no need to
2916 // add as a non-standard arg.
2918 GenTree* arg = call->gtCallAddr;
2919 if (arg->OperIsLocal())
2921 arg = gtClone(arg, true);
2925 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2926 call->gtFlags |= GTF_ASG;
2928 noway_assert(arg != nullptr);
2930 // And push the stub address onto the list of arguments
2931 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2934 nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
2937 #endif // defined(_TARGET_X86_)
2938 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2940 assert(!call->IsUnmanaged());
2942 GenTree* arg = call->gtCallCookie;
2943 noway_assert(arg != nullptr);
2944 call->gtCallCookie = nullptr;
2946 #if defined(_TARGET_X86_)
2947 // x86 passes the cookie on the stack as the final argument to the call.
2948 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2949 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2952 *insertionPoint = gtNewListNode(arg, nullptr);
2953 #else // !defined(_TARGET_X86_)
2954 // All other architectures pass the cookie in a register.
2955 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2956 #endif // defined(_TARGET_X86_)
2958 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2961 // put destination into R10/EAX
2962 arg = gtClone(call->gtCallAddr, true);
2963 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2966 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2968 // finally change this call to a helper call
2969 call->gtCallType = CT_HELPER;
2970 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2972 #endif // !defined(LEGACY_BACKEND)
2974 // Allocate the fgArgInfo for the call node;
2976 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2979 if (varTypeIsStruct(call))
2981 fgFixupStructReturn(call);
2984 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
2985 * During the first call to fgMorphArgs we also record the
2986 * information about late arguments we have in 'fgArgInfo'.
2987 * This information is used later to contruct the gtCallLateArgs */
2989 /* Process the 'this' argument value, if present */
2991 argx = call->gtCallObjp;
2995 argx = fgMorphTree(argx);
2996 call->gtCallObjp = argx;
2997 flagsSummary |= argx->gtFlags;
2999 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3001 assert(argIndex == 0);
3003 /* We must fill in or update the argInfo table */
3007 /* this is a register argument - possibly update it in the table */
3008 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3012 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3014 /* this is a register argument - put it in the table */
3015 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3016 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3018 false, REG_STK, nullptr
3019 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3022 // this can't be a struct.
3023 assert(argx->gtType != TYP_STRUCT);
3025 /* Increment the argument register count and argument index */
3026 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3029 #ifdef WINDOWS_AMD64_ABI
3030 // Whenever we pass an integer register argument
3031 // we skip the corresponding floating point register argument
3033 #endif // WINDOWS_AMD64_ABI
3037 noway_assert(!"the 'this' pointer can not be a floating point type");
3044 // Compute the maximum number of arguments that can be passed in registers.
3045 // For X86 we handle the varargs and unmanaged calling conventions
3047 if (call->gtFlags & GTF_CALL_POP_ARGS)
3049 noway_assert(intArgRegNum < MAX_REG_ARG);
3050 // No more register arguments for varargs (CALL_POP_ARGS)
3051 maxRegArgs = intArgRegNum;
3053 // Add in the ret buff arg
3054 if (callHasRetBuffArg)
3058 if (call->IsUnmanaged())
3060 noway_assert(intArgRegNum == 0);
3062 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3064 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3065 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3066 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3067 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3075 // Add in the ret buff arg
3076 if (callHasRetBuffArg)
3079 #endif // _TARGET_X86_
3081 /* Morph the user arguments */
3082 CLANG_FORMAT_COMMENT_ANCHOR;
3084 #if defined(_TARGET_ARM_)
3086 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3087 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3088 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3089 // appear in a lower-numbered register than floating point argument N. That is, argument
3090 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3091 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3092 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3093 // only back-fill single registers, since there is no way with these types to create
3094 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3095 // available (with 16 FP argument registers). Consider this code:
3097 // struct HFA { float x, y, z; }; // a three element HFA
3098 // void bar(float a1, // passed in f0
3099 // double a2, // passed in f2/f3; skip f1 for alignment
3100 // HFA a3, // passed in f4/f5/f6
3101 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3102 // HFA a5, // passed in f10/f11/f12
3103 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3105 // float a7, // passed in f1 (back-filled)
3106 // float a8, // passed in f7 (back-filled)
3107 // float a9, // passed in f13 (back-filled)
3108 // float a10) // passed on the stack in [OutArg+0]
3110 // Note that if we ever support FP types with larger alignment requirements, then there could
3111 // be more than single register back-fills.
3113 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3114 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3115 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3116 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3117 // and prevent any additional floating-point arguments from going in registers.
3119 bool anyFloatStackArgs = false;
3121 #endif // _TARGET_ARM_
3123 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3124 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3125 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3127 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3128 bool hasMultiregStructArgs = false;
3129 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3131 GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3133 #if FEATURE_MULTIREG_ARGS
3134 if (!hasStructArgument)
3136 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3138 #endif // FEATURE_MULTIREG_ARGS
3140 #ifndef LEGACY_BACKEND
3141 // Record the index of any nonStandard arg that we may be processing here, as we are
3142 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3143 GenTreePtr orig_argx = *parentArgx;
3144 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3145 #endif // !LEGACY_BACKEND
3147 argx = fgMorphTree(*parentArgx);
3149 flagsSummary |= argx->gtFlags;
3151 assert(args->OperIsList());
3152 assert(argx == args->Current());
3154 #ifndef LEGACY_BACKEND
3155 if ((nonStandard_index != -1) && (argx != orig_argx))
3157 // We need to update the node field for this nonStandard arg here
3158 // as it was changed by the call to fgMorphTree
3159 nonStandardArgs.Replace(nonStandard_index, argx);
3161 #endif // !LEGACY_BACKEND
3163 /* Change the node to TYP_I_IMPL so we don't report GC info
3164 * NOTE: We deferred this from the importer because of the inliner */
3166 if (argx->IsVarAddr())
3168 argx->gtType = TYP_I_IMPL;
3171 bool passUsingFloatRegs;
3172 unsigned argAlign = 1;
3173 // Setup any HFA information about 'argx'
3174 var_types hfaType = GetHfaType(argx);
3175 bool isHfaArg = varTypeIsFloating(hfaType);
3176 unsigned hfaSlots = 0;
3180 hfaSlots = GetHfaCount(argx);
3182 // If we have a HFA struct it's possible we transition from a method that originally
3183 // only had integer types to now start having FP types. We have to communicate this
3184 // through this flag since LSRA later on will use this flag to determine whether
3185 // or not to track the FP register set.
3187 compFloatingPointUsed = true;
3191 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3192 bool isRegArg = false;
3193 bool isNonStandard = false;
3194 regNumber nonStdRegNum = REG_NA;
3196 fgArgTabEntryPtr argEntry = nullptr;
3200 argEntry = gtArgEntryByArgNum(call, argIndex);
3205 bool passUsingIntRegs;
3208 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3209 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3213 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3214 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3217 GenTreePtr curArg = argx;
3218 // If late args have already been computed, use the node in the argument table.
3219 if (argEntry != NULL && argEntry->isTmp)
3221 curArg = argEntry->node;
3226 argAlign = argEntry->alignment;
3230 // We don't use the "size" return value from InferOpSizeAlign().
3231 codeGen->InferOpSizeAlign(curArg, &argAlign);
3233 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3234 argAlign /= TARGET_POINTER_SIZE;
3239 if (passUsingFloatRegs)
3241 if (fltArgRegNum % 2 == 1)
3243 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3247 else if (passUsingIntRegs)
3249 if (intArgRegNum % 2 == 1)
3251 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3256 if (argSlots % 2 == 1)
3262 #elif defined(_TARGET_ARM64_)
3266 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3270 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3273 #elif defined(_TARGET_AMD64_)
3276 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3280 passUsingFloatRegs = varTypeIsFloating(argx);
3282 #elif defined(_TARGET_X86_)
3284 passUsingFloatRegs = false;
3287 #error Unsupported or unset target architecture
3290 bool isBackFilled = false;
3291 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3292 var_types structBaseType = TYP_STRUCT;
3293 unsigned structSize = 0;
3295 bool isStructArg = varTypeIsStruct(argx);
3299 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3300 // Get the struct description for the already completed struct argument.
3301 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3302 assert(fgEntryPtr != nullptr);
3304 // As described in few other places, this can happen when the argx was morphed
3305 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3306 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3307 // was a struct and the struct classification.
3308 isStructArg = fgEntryPtr->isStruct;
3312 structDesc.CopyFrom(fgEntryPtr->structDesc);
3314 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3316 assert(argEntry != nullptr);
3317 if (argEntry->IsBackFilled())
3320 size = argEntry->numRegs;
3321 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3323 isBackFilled = true;
3325 else if (argEntry->regNum == REG_STK)
3328 assert(argEntry->numRegs == 0);
3329 size = argEntry->numSlots;
3334 assert(argEntry->numRegs > 0);
3335 size = argEntry->numRegs + argEntry->numSlots;
3338 // This size has now been computed
3344 // Figure out the size of the argument. This is either in number of registers, or number of
3345 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3348 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3350 #if defined(_TARGET_AMD64_)
3351 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3354 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3358 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3359 TARGET_POINTER_SIZE)) /
3360 TARGET_POINTER_SIZE;
3361 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3364 hasMultiregStructArgs = true;
3367 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3368 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3369 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3370 #elif defined(_TARGET_ARM64_)
3375 size = GetHfaCount(argx);
3376 // HFA structs are passed by value in multiple registers
3377 hasMultiregStructArgs = true;
3381 // Structs are either passed in 1 or 2 (64-bit) slots
3382 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3383 TARGET_POINTER_SIZE)) /
3384 TARGET_POINTER_SIZE;
3388 // Structs that are the size of 2 pointers are passed by value in multiple registers
3389 hasMultiregStructArgs = true;
3393 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3394 // reference (to a copy)
3397 // Note that there are some additional rules for multireg structs.
3398 // (i.e they cannot be split between registers and the stack)
3402 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3404 #elif defined(_TARGET_ARM_)
3407 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3408 TARGET_POINTER_SIZE)) /
3409 TARGET_POINTER_SIZE;
3412 hasMultiregStructArgs = true;
3414 else if (size > 1 && size <= 4)
3416 hasMultiregStructArgs = true;
3422 // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3423 size = genTypeStSz(argx->gtType);
3425 #elif defined(_TARGET_X86_)
3426 size = genTypeStSz(argx->gtType);
3428 #error Unsupported or unset target architecture
3429 #endif // _TARGET_XXX_
3434 size = GetHfaCount(argx);
3435 hasMultiregStructArgs = true;
3437 #endif // _TARGET_ARM_
3440 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3441 if (argx->gtOper == GT_MKREFANY)
3443 if (varTypeIsStruct(argx))
3447 #ifdef _TARGET_AMD64_
3448 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3449 if (varTypeIsStruct(argx))
3451 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3452 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3453 size = roundupSize / TARGET_POINTER_SIZE;
3454 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3457 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3465 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3467 GenTreePtr argObj = argx;
3468 GenTreePtr* parentOfArgObj = parentArgx;
3470 assert(args->OperIsList());
3471 assert(argx == args->Current());
3473 /* The GT_OBJ may be be a child of a GT_COMMA */
3474 while (argObj->gtOper == GT_COMMA)
3476 parentOfArgObj = &argObj->gtOp.gtOp2;
3477 argObj = argObj->gtOp.gtOp2;
3480 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3481 if (argObj->gtOper != GT_OBJ)
3483 BADCODE("illegal argument tree in fgMorphArgs");
3486 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3487 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3488 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3489 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3491 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3492 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3493 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3495 structSize = originalSize;
3497 structPassingKind howToPassStruct;
3498 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3500 #ifdef _TARGET_ARM64_
3501 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3502 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3504 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3506 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3507 // we can read 4 or 8 bytes from the LclVar to pass this arg
3508 originalSize = genTypeSize(structBaseType);
3511 #endif // _TARGET_ARM64_
3513 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3514 // On System V OS-es a struct is never passed by reference.
3515 // It is either passed by value on the stack or in registers.
3516 bool passStructInRegisters = false;
3517 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3518 bool passStructByRef = false;
3519 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3521 // The following if-then-else needs to be carefully refactored.
3522 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3523 // into a GT_IND of the appropriate size.
3524 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3525 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3526 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3527 // It also can't do this if we have a HFA arg,
3528 // unless we have a 1-elem HFA in which case we want to do the optimization.
3529 CLANG_FORMAT_COMMENT_ANCHOR;
3531 #ifndef _TARGET_X86_
3532 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3533 // Check for struct argument with size 1, 2, 4 or 8 bytes
3534 // As we can optimize these by turning them into a GT_IND of the correct type
3536 // Check for cases that we cannot optimize:
3538 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3539 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3540 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3541 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3543 // Normalize 'size' to the number of pointer sized items
3544 // 'size' is the number of register slots that we will use to pass the argument
3545 size = roundupSize / TARGET_POINTER_SIZE;
3546 #if defined(_TARGET_AMD64_)
3547 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3548 size = 1; // This must be copied to a temp and passed by address
3549 passStructByRef = true;
3550 copyBlkClass = objClass;
3551 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3552 if (!structDesc.passedInRegisters)
3554 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3555 bool needCpyBlk = false;
3556 if (lclVar != nullptr)
3558 // If the struct is promoted to registers, it has to be materialized
3559 // on stack. We may want to support promoted structures in
3560 // codegening pugarg_stk instead of creating a copy here.
3561 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3562 needCpyBlk = varDsc->lvPromoted;
3566 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3567 // sets structDesc.passedInRegisters to be false.
3569 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3570 // by rationalizer. For now we will let SIMD struct arg to be copied to
3571 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3574 // | \--* addr byref
3575 // | | /--* lclVar simd16 V05 loc4
3576 // | \--* simd simd16 int -
3577 // | \--* lclVar simd16 V08 tmp1
3579 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3580 // so that we don't need to generate a copy here.
3581 GenTree* addr = argObj->gtOp.gtOp1;
3582 if (addr->OperGet() == GT_ADDR)
3584 GenTree* addrChild = addr->gtOp.gtOp1;
3585 if (addrChild->OperGet() == GT_SIMD)
3591 passStructInRegisters = false;
3594 copyBlkClass = objClass;
3598 copyBlkClass = NO_CLASS_HANDLE;
3603 // The objClass is used to materialize the struct on stack.
3604 // For SystemV, the code below generates copies for struct arguments classified
3605 // as register argument.
3606 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3607 // can be passed on registers or can be copied directly to outgoing area.
3608 passStructInRegisters = true;
3609 copyBlkClass = objClass;
3612 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3613 #elif defined(_TARGET_ARM64_)
3614 if ((size > 2) && !isHfaArg)
3616 size = 1; // This must be copied to a temp and passed by address
3617 passStructByRef = true;
3618 copyBlkClass = objClass;
3623 // If we're passing a promoted struct local var,
3624 // we may need to skip some registers due to alignment; record those.
3625 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3628 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3629 if (varDsc->lvPromoted)
3631 assert(argObj->OperGet() == GT_OBJ);
3632 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3634 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3638 #endif // _TARGET_ARM_
3640 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3641 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3642 // generated for struct 1, 2, 4, or 8.
3643 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3645 // change our GT_OBJ into a GT_IND of the correct type.
3646 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3649 assert(howToPassStruct == SPK_PrimitiveType);
3651 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3655 // If we reach here with an HFA arg it has to be a one element HFA
3656 assert(hfaSlots == 1);
3657 structBaseType = hfaType; // change the indirection type to a floating point type
3660 noway_assert(structBaseType != TYP_UNKNOWN);
3662 argObj->ChangeOper(GT_IND);
3664 // Now see if we can fold *(&X) into X
3665 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3667 GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3669 // Keep the DONT_CSE flag in sync
3670 // (as the addr always marks it for its op1)
3671 temp->gtFlags &= ~GTF_DONT_CSE;
3672 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3673 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3674 DEBUG_DESTROY_NODE(argObj); // GT_IND
3677 *parentOfArgObj = temp;
3679 // If the OBJ had been the top level node, we've now changed argx.
3680 if (parentOfArgObj == parentArgx)
3685 if (argObj->gtOper == GT_LCL_VAR)
3687 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3688 LclVarDsc* varDsc = &lvaTable[lclNum];
3690 if (varDsc->lvPromoted)
3692 if (varDsc->lvFieldCnt == 1)
3694 // get the first and only promoted field
3695 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3696 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3698 // we will use the first and only promoted field
3699 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3701 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3702 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3704 // Just use the existing field's type
3705 argObj->gtType = fieldVarDsc->TypeGet();
3709 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3711 argObj->ChangeOper(GT_LCL_FLD);
3712 argObj->gtType = structBaseType;
3714 assert(varTypeCanReg(argObj->TypeGet()));
3715 assert(copyBlkClass == NO_CLASS_HANDLE);
3719 // use GT_LCL_FLD to swizzle the single field struct to a new type
3720 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3721 argObj->ChangeOper(GT_LCL_FLD);
3722 argObj->gtType = structBaseType;
3727 // The struct fits into a single register, but it has been promoted into its
3728 // constituent fields, and so we have to re-assemble it
3729 copyBlkClass = objClass;
3731 // Alignment constraints may cause us not to use (to "skip") some argument
3732 // registers. Add those, if any, to the skipped (int) arg reg mask.
3733 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3734 #endif // _TARGET_ARM_
3737 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3739 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3740 argObj->ChangeOper(GT_LCL_FLD);
3741 argObj->gtType = structBaseType;
3746 // Not a GT_LCL_VAR, so we can just change the type on the node
3747 argObj->gtType = structBaseType;
3749 assert(varTypeCanReg(argObj->TypeGet()) ||
3750 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3754 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3756 #endif // not _TARGET_X86_
3757 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3758 if ((structBaseType == TYP_STRUCT) &&
3759 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3760 !passStructInRegisters
3761 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3763 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3766 if (isHfaArg && passUsingFloatRegs)
3768 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3772 // If the valuetype size is not a multiple of sizeof(void*),
3773 // we must copyblk to a temp before doing the obj to avoid
3774 // the obj reading memory past the end of the valuetype
3775 CLANG_FORMAT_COMMENT_ANCHOR;
3777 if (roundupSize > originalSize)
3779 copyBlkClass = objClass;
3781 // There are a few special cases where we can omit using a CopyBlk
3782 // where we normally would need to use one.
3784 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3786 copyBlkClass = NO_CLASS_HANDLE;
3790 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3795 #ifdef _TARGET_64BIT_
3798 hasMultiregStructArgs = true;
3800 #elif defined(_TARGET_ARM_)
3801 // TODO-Arm: Need to handle the case
3802 // where structs passed by value can be split between registers and stack.
3803 if (size > 1 && size <= 4)
3805 hasMultiregStructArgs = true;
3807 #ifndef LEGACY_BACKEND
3808 else if (size > 4 && passUsingIntRegs)
3810 NYI_ARM("Struct can be split between registers and stack");
3812 #endif // !LEGACY_BACKEND
3813 #endif // _TARGET_ARM_
3816 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3820 // Figure out if the argument will be passed in a register.
3823 if (isRegParamType(genActualType(argx->TypeGet()))
3824 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3825 && (!isStructArg || structDesc.passedInRegisters)
3830 if (passUsingFloatRegs)
3832 // First, see if it can be back-filled
3833 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3834 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3835 (size == 1)) // The size to back-fill is one float register
3837 // Back-fill the register.
3838 isBackFilled = true;
3839 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3840 fltArgSkippedRegMask &=
3841 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3842 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3843 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3846 // Does the entire float, double, or HFA fit in the FP arg registers?
3847 // Check if the last register needed is still in the argument register range.
3848 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3852 anyFloatStackArgs = true;
3857 isRegArg = intArgRegNum < MAX_REG_ARG;
3859 #elif defined(_TARGET_ARM64_)
3860 if (passUsingFloatRegs)
3862 // Check if the last register needed is still in the fp argument register range.
3863 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3865 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3866 if (isHfaArg && !isRegArg)
3868 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3871 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3872 size = roundupSize / TARGET_POINTER_SIZE;
3874 // We also must update fltArgRegNum so that we no longer try to
3875 // allocate any new floating point registers for args
3876 // This prevents us from backfilling a subsequent arg into d7
3878 fltArgRegNum = MAX_FLOAT_REG_ARG;
3883 // Check if the last register needed is still in the int argument register range.
3884 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3886 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3887 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3888 // This prevents us from backfilling a subsequent arg into x7
3890 if (!isRegArg && (size > 1))
3892 // We also must update intArgRegNum so that we no longer try to
3893 // allocate any new general purpose registers for args
3895 intArgRegNum = maxRegArgs;
3898 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3900 #if defined(UNIX_AMD64_ABI)
3902 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3903 // Here a struct can be passed in register following the classifications of its members and size.
3904 // Now make sure there are actually enough registers to do so.
3907 unsigned int structFloatRegs = 0;
3908 unsigned int structIntRegs = 0;
3909 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3911 if (structDesc.IsIntegralSlot(i))
3915 else if (structDesc.IsSseSlot(i))
3921 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3922 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3925 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3927 if (passUsingFloatRegs)
3929 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3933 isRegArg = intArgRegNum < MAX_REG_ARG;
3936 #else // !defined(UNIX_AMD64_ABI)
3937 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3938 #endif // !defined(UNIX_AMD64_ABI)
3939 #endif // _TARGET_ARM_
3946 #ifndef LEGACY_BACKEND
3947 // If there are nonstandard args (outside the calling convention) they were inserted above
3948 // and noted them in a table so we can recognize them here and build their argInfo.
3950 // They should not affect the placement of any other args or stack space required.
3951 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3952 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3953 if (isNonStandard && (nonStdRegNum == REG_STK))
3957 #if defined(_TARGET_X86_)
3958 else if (call->IsTailCallViaHelper())
3960 // We have already (before calling fgMorphArgs()) appended the 4 special args
3961 // required by the x86 tailcall helper. These args are required to go on the
3962 // stack. Force them to the stack here.
3963 assert(numArgs >= 4);
3964 if (argIndex >= numArgs - 4)
3969 #endif // defined(_TARGET_X86_)
3970 #endif // !LEGACY_BACKEND
3971 } // end !reMorphing
3974 // Now we know if the argument goes in registers or not and how big it is,
3975 // whether we had to just compute it or this is a re-morph call and we looked it up.
3977 CLANG_FORMAT_COMMENT_ANCHOR;
3980 // If we ever allocate a floating point argument to the stack, then all
3981 // subsequent HFA/float/double arguments go on the stack.
3982 if (!isRegArg && passUsingFloatRegs)
3984 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3986 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3990 // If we think we're going to split a struct between integer registers and the stack, check to
3991 // see if we've already assigned a floating-point arg to the stack.
3992 if (isRegArg && // We decided above to use a register for the argument
3993 !passUsingFloatRegs && // We're using integer registers
3994 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3995 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3997 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3999 // Skip the rest of the integer argument registers
4000 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4002 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4006 #endif // _TARGET_ARM_
4010 regNumber nextRegNum = REG_STK;
4011 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4012 regNumber nextOtherRegNum = REG_STK;
4013 unsigned int structFloatRegs = 0;
4014 unsigned int structIntRegs = 0;
4016 if (isStructArg && structDesc.passedInRegisters)
4018 // It is a struct passed in registers. Assign the next available register.
4019 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4020 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4021 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4023 if (structDesc.IsIntegralSlot(i))
4025 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4028 else if (structDesc.IsSseSlot(i))
4030 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4036 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4038 // fill in or update the argInfo table
4039 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4040 : genMapIntRegArgNumToRegNum(intArgRegNum);
4043 #ifdef _TARGET_AMD64_
4044 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4049 fgArgTabEntryPtr newArgEntry;
4052 // This is a register argument - possibly update it in the table
4053 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4059 nextRegNum = nonStdRegNum;
4062 // This is a register argument - put it in the table
4063 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4064 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4066 isStructArg, nextOtherRegNum, &structDesc
4067 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4070 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4071 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4072 newArgEntry->SetIsBackFilled(isBackFilled);
4073 newArgEntry->isNonStandard = isNonStandard;
4076 if (newArgEntry->isNonStandard)
4081 // Set up the next intArgRegNum and fltArgRegNum values.
4084 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4087 intArgRegNum += structIntRegs;
4088 fltArgRegNum += structFloatRegs;
4091 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4093 if (passUsingFloatRegs)
4095 fltArgRegNum += size;
4097 #ifdef WINDOWS_AMD64_ABI
4098 // Whenever we pass an integer register argument
4099 // we skip the corresponding floating point register argument
4100 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4101 #endif // WINDOWS_AMD64_ABI
4103 if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4105 #ifndef LEGACY_BACKEND
4106 NYI_ARM("Struct split between float registers and stack");
4107 #endif // !LEGACY_BACKEND
4108 // This indicates a partial enregistration of a struct type
4109 assert(varTypeIsStruct(argx));
4110 unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4111 assert((unsigned char)numRegsPartial == numRegsPartial);
4112 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4113 fltArgRegNum = MAX_FLOAT_REG_ARG;
4115 #endif // _TARGET_ARM_
4119 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4121 // we are setting up the fixed return buffer register argument
4122 // so don't increment intArgRegNum
4127 // Increment intArgRegNum by 'size' registers
4128 intArgRegNum += size;
4131 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4132 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4133 #endif // _TARGET_AMD64_
4135 if (intArgRegNum > MAX_REG_ARG)
4137 #ifndef LEGACY_BACKEND
4138 NYI_ARM("Struct split between integer registers and stack");
4139 #endif // !LEGACY_BACKEND
4140 // This indicates a partial enregistration of a struct type
4141 assert((isStructArg) || argx->OperIsCopyBlkOp() ||
4142 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4143 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4144 assert((unsigned char)numRegsPartial == numRegsPartial);
4145 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4146 intArgRegNum = MAX_REG_ARG;
4147 fgPtrArgCntCur += size - numRegsPartial;
4149 #endif // _TARGET_ARM_
4154 else // We have an argument that is not passed in a register
4156 fgPtrArgCntCur += size;
4158 // If the register arguments have not been determined then we must fill in the argInfo
4162 // This is a stack argument - possibly update it in the table
4163 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4167 // This is a stack argument - put it in the table
4168 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4169 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4173 if (copyBlkClass != NO_CLASS_HANDLE)
4175 noway_assert(!reMorphing);
4176 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4177 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4179 // This can cause a GTF_EXCEPT flag to be set.
4180 // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4181 // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4182 // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4183 // any struct arguments.
4184 // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4185 flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4187 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4188 hasStackArgCopy = true;
4192 #ifndef LEGACY_BACKEND
4193 if (argx->gtOper == GT_MKREFANY)
4195 // 'Lower' the MKREFANY tree and insert it.
4196 noway_assert(!reMorphing);
4198 #ifndef _TARGET_64BIT_
4200 // Build the mkrefany as a GT_FIELD_LIST
4201 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4202 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4203 (void)new (this, GT_FIELD_LIST)
4204 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4205 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4206 fp->node = fieldList;
4207 args->gtOp.gtOp1 = fieldList;
4209 #else // _TARGET_64BIT_
4212 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4213 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4214 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4216 // Build the mkrefany as a comma node:
4217 // (tmp.ptr=argx),(tmp.type=handle)
4218 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4219 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4220 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4221 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4222 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4223 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4225 GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4226 GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4227 GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4229 // Change the expression to "(tmp=val)"
4230 args->gtOp.gtOp1 = asg;
4232 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4233 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4234 lvaSetVarAddrExposed(tmp);
4235 #endif // _TARGET_64BIT_
4237 #endif // !LEGACY_BACKEND
4239 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4242 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4243 if ((lclNode != nullptr) &&
4244 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4246 // Make a GT_FIELD_LIST of the field lclVars.
4247 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4248 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4249 GenTreeFieldList* fieldList = nullptr;
4250 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4251 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4253 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4254 if (fieldList == nullptr)
4256 lcl->SetLclNum(fieldLclNum);
4257 lcl->ChangeOper(GT_LCL_VAR);
4258 lcl->gtType = fieldVarDsc->lvType;
4259 fieldList = new (this, GT_FIELD_LIST)
4260 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4261 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4262 fp->node = fieldList;
4263 args->gtOp.gtOp1 = fieldList;
4267 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4268 fieldList = new (this, GT_FIELD_LIST)
4269 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4274 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4276 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4277 if (isStructArg && !isRegArg)
4279 nonRegPassedStructSlots += size;
4282 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4286 } // end foreach argument loop
4290 call->fgArgInfo->ArgsComplete();
4292 #ifdef LEGACY_BACKEND
4293 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4294 #if defined(_TARGET_ARM_)
4295 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4297 if (fltArgRegNum > 0)
4299 #if defined(_TARGET_ARM_)
4300 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4303 #endif // LEGACY_BACKEND
4306 if (call->gtCallArgs)
4308 UpdateGT_LISTFlags(call->gtCallArgs);
4311 /* Process the function address, if indirect call */
4313 if (call->gtCallType == CT_INDIRECT)
4315 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4318 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4320 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4325 /* Remember the maximum value we ever see */
4327 if (fgPtrArgCntMax < fgPtrArgCntCur)
4329 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4330 fgPtrArgCntMax = fgPtrArgCntCur;
4333 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4334 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4336 /* The call will pop all the arguments we pushed */
4338 fgPtrArgCntCur = genPtrArgCntSav;
4340 #if FEATURE_FIXED_OUT_ARGS
4342 // Record the outgoing argument size. If the call is a fast tail
4343 // call, it will setup its arguments in incoming arg area instead
4344 // of the out-going arg area, so we don't need to track the
4345 // outgoing arg size.
4346 if (!call->IsFastTailCall())
4348 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4350 #if defined(UNIX_AMD64_ABI)
4351 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4353 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4355 // First slots go in registers only, no stack needed.
4356 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4357 // and ignores floating point args (it is overly conservative in that case).
4358 preallocatedArgCount = nonRegPassedStructSlots;
4359 if (argSlots > MAX_REG_ARG)
4361 preallocatedArgCount += argSlots - MAX_REG_ARG;
4363 #endif // UNIX_AMD64_ABI
4365 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4366 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4371 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4372 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4376 #endif // FEATURE_FIXED_OUT_ARGS
4378 /* Update the 'side effect' flags value for the call */
4380 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4382 // If the register arguments have already been determined
4383 // or we have no register arguments then we don't need to
4384 // call SortArgs() and EvalArgsToTemps()
4386 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4387 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4388 // is added to make sure to call EvalArgsToTemp.
4389 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4390 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4392 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4395 // This is the first time that we morph this call AND it has register arguments.
4396 // Follow into the code below and do the 'defer or eval to temp' analysis.
4398 call->fgArgInfo->SortArgs();
4400 call->fgArgInfo->EvalArgsToTemps();
4402 // We may have updated the arguments
4403 if (call->gtCallArgs)
4405 UpdateGT_LISTFlags(call->gtCallArgs);
4409 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4411 // Rewrite the struct args to be passed by value on stack or in registers.
4412 fgMorphSystemVStructArgs(call, hasStructArgument);
4414 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4416 #ifndef LEGACY_BACKEND
4417 // In the future we can migrate UNIX_AMD64 to use this
4418 // method instead of fgMorphSystemVStructArgs
4420 // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4421 if (hasMultiregStructArgs)
4423 fgMorphMultiregStructArgs(call);
4425 #endif // LEGACY_BACKEND
4427 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4432 fgArgInfoPtr argInfo = call->fgArgInfo;
4433 for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4435 fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4436 curArgEntry->Dump();
4444 #pragma warning(pop)
4447 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4448 // fgMorphSystemVStructArgs:
4449 // Rewrite the struct args to be passed by value on stack or in registers.
4452 // call: The call whose arguments need to be morphed.
4453 // hasStructArgument: Whether this call has struct arguments.
4455 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4457 unsigned flagsSummary = 0;
4461 if (hasStructArgument)
4463 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4465 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4467 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4468 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4469 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4470 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4471 // otherwise points to the list in the late args list.
4472 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4473 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4474 assert(fgEntryPtr != nullptr);
4475 GenTreePtr argx = fgEntryPtr->node;
4476 GenTreePtr lateList = nullptr;
4477 GenTreePtr lateNode = nullptr;
4481 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4483 assert(list->OperIsList());
4485 GenTreePtr argNode = list->Current();
4486 if (argx == argNode)
4493 assert(lateList != nullptr && lateNode != nullptr);
4495 GenTreePtr arg = argx;
4496 bool argListCreated = false;
4498 var_types type = arg->TypeGet();
4500 if (varTypeIsStruct(type))
4502 var_types originalType = type;
4503 // If we have already processed the arg...
4504 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4509 // If already OBJ it is set properly already.
4510 if (arg->OperGet() == GT_OBJ)
4512 assert(!fgEntryPtr->structDesc.passedInRegisters);
4516 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4517 (arg->OperGet() == GT_ADDR &&
4518 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4520 GenTreeLclVarCommon* lclCommon =
4521 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4522 if (fgEntryPtr->structDesc.passedInRegisters)
4524 if (fgEntryPtr->structDesc.eightByteCount == 1)
4526 // Change the type and below the code will change the LclVar to a LCL_FLD
4527 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4528 fgEntryPtr->structDesc.eightByteSizes[0]);
4530 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4532 // Create LCL_FLD for each eightbyte.
4533 argListCreated = true;
4536 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4538 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4539 fgEntryPtr->structDesc.eightByteSizes[0]);
4540 GenTreeFieldList* fieldList =
4541 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4542 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4545 // Second eightbyte.
4546 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4547 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4548 .eightByteClassifications[1],
4549 fgEntryPtr->structDesc.eightByteSizes[1]),
4550 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4552 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4553 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4554 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4558 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4563 // If we didn't change the type of the struct, it means
4564 // its classification doesn't support to be passed directly through a
4565 // register, so we need to pass a pointer to the destination where
4566 // where we copied the struct to.
4567 if (!argListCreated)
4569 if (fgEntryPtr->structDesc.passedInRegisters)
4575 // Make sure this is an addr node.
4576 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4578 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4581 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4583 // Create an Obj of the temp to use it as a call argument.
4584 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4591 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4592 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4593 assert(fgEntryPtr != nullptr);
4594 GenTreePtr argx = fgEntryPtr->node;
4595 GenTreePtr lateList = nullptr;
4596 GenTreePtr lateNode = nullptr;
4599 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4601 assert(list->OperIsList());
4603 GenTreePtr argNode = list->Current();
4604 if (argx == argNode)
4611 assert(lateList != nullptr && lateNode != nullptr);
4614 fgEntryPtr->node = arg;
4617 lateList->gtOp.gtOp1 = arg;
4621 args->gtOp.gtOp1 = arg;
4628 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4630 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4632 //-----------------------------------------------------------------------------
4633 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4634 // call fgMorphMultiregStructArg on each of them.
4637 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4640 // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4641 // The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4642 // which is only used for struct arguments.
4643 // If this method fails to find any TYP_STRUCT arguments it will assert.
4645 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4649 bool foundStructArg = false;
4650 unsigned initialFlags = call->gtFlags;
4651 unsigned flagsSummary = 0;
4652 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4654 // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4655 // in the future AMD64_UNIX will also use this method
4656 CLANG_FORMAT_COMMENT_ANCHOR;
4659 assert(!"Logic error: no MultiregStructArgs for X86");
4661 #ifdef _TARGET_AMD64_
4662 #if defined(UNIX_AMD64_ABI)
4663 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4664 #else // WINDOWS_AMD64_ABI
4665 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4666 #endif // !UNIX_AMD64_ABI
4669 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4671 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4672 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4673 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4674 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4675 // otherwise points to the list in the late args list.
4676 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4677 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4678 assert(fgEntryPtr != nullptr);
4679 GenTreePtr argx = fgEntryPtr->node;
4680 GenTreePtr lateList = nullptr;
4681 GenTreePtr lateNode = nullptr;
4685 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4687 assert(list->OperIsList());
4689 GenTreePtr argNode = list->Current();
4690 if (argx == argNode)
4697 assert(lateList != nullptr && lateNode != nullptr);
4700 GenTreePtr arg = argx;
4702 if (arg->TypeGet() == TYP_STRUCT)
4704 foundStructArg = true;
4706 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4708 // Did we replace 'argx' with a new tree?
4711 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4713 // link the new arg node into either the late arg list or the gtCallArgs list
4716 lateList->gtOp.gtOp1 = arg;
4720 args->gtOp.gtOp1 = arg;
4726 // We should only call this method when we actually have one or more multireg struct args
4727 assert(foundStructArg);
4730 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4733 //-----------------------------------------------------------------------------
4734 // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
4735 // Morph the argument into a set of GT_FIELD_LIST nodes.
4738 // arg - A GenTree node containing a TYP_STRUCT arg that
4739 // is to be passed in multiple registers
4740 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4743 // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4744 // for passing in multiple registers.
4745 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4746 // and if they are at the appropriate offsets we will use the struct promted fields
4747 // in the GT_FIELD_LIST nodes that we create.
4748 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4749 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4750 // this also forces the struct to be stack allocated into the local frame.
4751 // For the GT_OBJ case will clone the address expression and generate two (or more)
4753 // Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4755 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4757 assert(arg->TypeGet() == TYP_STRUCT);
4759 #ifndef _TARGET_ARMARCH_
4760 NYI("fgMorphMultiregStructArg requires implementation for this target");
4763 #if FEATURE_MULTIREG_ARGS
4764 // Examine 'arg' and setup argValue objClass and structSize
4766 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4767 GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
4768 unsigned structSize = 0;
4770 if (arg->OperGet() == GT_OBJ)
4772 GenTreeObj* argObj = arg->AsObj();
4773 objClass = argObj->gtClass;
4774 structSize = info.compCompHnd->getClassSize(objClass);
4776 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4778 if (argObj->gtOp1->OperGet() == GT_ADDR)
4780 argValue = argObj->gtOp1->gtOp.gtOp1;
4783 else if (arg->OperGet() == GT_LCL_VAR)
4785 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4786 unsigned varNum = varNode->gtLclNum;
4787 assert(varNum < lvaCount);
4788 LclVarDsc* varDsc = &lvaTable[varNum];
4790 objClass = lvaGetStruct(varNum);
4791 structSize = varDsc->lvExactSize;
4793 noway_assert(objClass != nullptr);
4795 var_types hfaType = TYP_UNDEF;
4796 var_types elemType = TYP_UNDEF;
4797 unsigned elemCount = 0;
4798 unsigned elemSize = 0;
4799 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4801 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4802 if (varTypeIsFloating(hfaType))
4805 elemSize = genTypeSize(elemType);
4806 elemCount = structSize / elemSize;
4807 assert(elemSize * elemCount == structSize);
4808 for (unsigned inx = 0; inx < elemCount; inx++)
4810 type[inx] = elemType;
4815 #ifdef _TARGET_ARM64_
4816 assert(structSize <= 2 * TARGET_POINTER_SIZE);
4817 #elif defined(_TARGET_ARM_)
4818 assert(structSize <= 4 * TARGET_POINTER_SIZE);
4821 #ifdef _TARGET_ARM64_
4822 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4823 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4825 type[0] = getJitGCType(gcPtrs[0]);
4826 type[1] = getJitGCType(gcPtrs[1]);
4827 #elif defined(_TARGET_ARM_)
4828 BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
4829 elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4830 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4831 for (unsigned inx = 0; inx < elemCount; inx++)
4833 type[inx] = getJitGCType(gcPtrs[inx]);
4835 #endif // _TARGET_ARM_
4837 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4839 elemSize = TARGET_POINTER_SIZE;
4840 // We can safely widen this to aligned bytes since we are loading from
4841 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4842 // lives in the stack frame or will be a promoted field.
4844 structSize = elemCount * TARGET_POINTER_SIZE;
4846 else // we must have a GT_OBJ
4848 assert(argValue->OperGet() == GT_OBJ);
4850 // We need to load the struct from an arbitrary address
4851 // and we can't read past the end of the structSize
4852 // We adjust the last load type here
4854 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4855 unsigned lastElem = elemCount - 1;
4856 if (remainingBytes != 0)
4858 switch (remainingBytes)
4861 type[lastElem] = TYP_BYTE;
4864 type[lastElem] = TYP_SHORT;
4866 #ifdef _TARGET_ARM64_
4868 type[lastElem] = TYP_INT;
4870 #endif // _TARGET_ARM64_
4872 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4878 // We should still have a TYP_STRUCT
4879 assert(argValue->TypeGet() == TYP_STRUCT);
4881 GenTreeFieldList* newArg = nullptr;
4883 // Are we passing a struct LclVar?
4885 if (argValue->OperGet() == GT_LCL_VAR)
4887 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4888 unsigned varNum = varNode->gtLclNum;
4889 assert(varNum < lvaCount);
4890 LclVarDsc* varDsc = &lvaTable[varNum];
4892 // At this point any TYP_STRUCT LclVar must be an aligned struct
4893 // or an HFA struct, both which are passed by value.
4895 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4897 varDsc->lvIsMultiRegArg = true;
4902 JITDUMP("Multireg struct argument V%02u : ");
4907 // This local variable must match the layout of the 'objClass' type exactly
4908 if (varDsc->lvIsHfa())
4910 // We have a HFA struct
4911 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4912 noway_assert(elemSize == genTypeSize(elemType));
4913 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4914 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4916 for (unsigned inx = 0; (inx < elemCount); inx++)
4918 noway_assert(type[inx] == elemType);
4923 #ifdef _TARGET_ARM64_
4924 // We must have a 16-byte struct (non-HFA)
4925 noway_assert(elemCount == 2);
4926 #elif defined(_TARGET_ARM_)
4927 noway_assert(elemCount <= 4);
4930 for (unsigned inx = 0; inx < elemCount; inx++)
4932 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4934 // We setup the type[inx] value above using the GC info from 'objClass'
4935 // This GT_LCL_VAR must have the same GC layout info
4937 if (currentGcLayoutType != TYPE_GC_NONE)
4939 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4943 // We may have use a small type when we setup the type[inx] values above
4944 // We can safely widen this to TYP_I_IMPL
4945 type[inx] = TYP_I_IMPL;
4950 #ifdef _TARGET_ARM64_
4951 // Is this LclVar a promoted struct with exactly 2 fields?
4952 // TODO-ARM64-CQ: Support struct promoted HFA types here
4953 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4955 // See if we have two promoted fields that start at offset 0 and 8?
4956 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4957 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4959 // Did we find the promoted fields at the necessary offsets?
4960 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4962 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4963 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4965 var_types loType = loVarDsc->lvType;
4966 var_types hiType = hiVarDsc->lvType;
4968 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4970 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4971 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4973 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4976 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4981 // We can use the struct promoted field as the two arguments
4983 GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4984 GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4986 // Create a new tree for 'arg'
4987 // replace the existing LDOBJ(ADDR(LCLVAR))
4988 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4990 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4991 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4998 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5000 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5002 #elif defined(_TARGET_ARM_)
5003 // Is this LclVar a promoted struct with exactly same size?
5004 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5006 // See if we have promoted fields?
5007 unsigned varNums[4];
5008 bool hasBadVarNum = false;
5009 for (unsigned inx = 0; inx < elemCount; inx++)
5011 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5012 if (varNums[inx] == BAD_VAR_NUM)
5014 hasBadVarNum = true;
5019 // Did we find the promoted fields at the necessary offsets?
5022 LclVarDsc* varDscs[4];
5023 var_types varType[4];
5024 bool varIsFloat = false;
5026 for (unsigned inx = 0; inx < elemCount; inx++)
5028 varDscs[inx] = &lvaTable[varNums[inx]];
5029 varType[inx] = varDscs[inx]->lvType;
5030 if (varTypeIsFloating(varType[inx]))
5032 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5034 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5036 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5039 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5048 unsigned offset = 0;
5049 GenTreeFieldList* listEntry = nullptr;
5050 // We can use the struct promoted field as arguments
5051 for (unsigned inx = 0; inx < elemCount; inx++)
5053 GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5054 // Create a new tree for 'arg'
5055 // replace the existing LDOBJ(ADDR(LCLVAR))
5056 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5057 if (newArg == nullptr)
5061 offset += TARGET_POINTER_SIZE;
5069 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5071 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5073 #endif // _TARGET_ARM_
5076 // If we didn't set newarg to a new List Node tree
5078 if (newArg == nullptr)
5080 if (fgEntryPtr->regNum == REG_STK)
5082 // We leave this stack passed argument alone
5086 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5087 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5089 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5091 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5092 unsigned varNum = varNode->gtLclNum;
5093 assert(varNum < lvaCount);
5094 LclVarDsc* varDsc = &lvaTable[varNum];
5096 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5097 unsigned lastOffset = baseOffset + (elemCount * elemSize);
5099 // The allocated size of our LocalVar must be at least as big as lastOffset
5100 assert(varDsc->lvSize() >= lastOffset);
5102 if (varDsc->lvStructGcCount > 0)
5104 // alignment of the baseOffset is required
5105 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5106 noway_assert(elemSize == TARGET_POINTER_SIZE);
5107 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
5108 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
5109 for (unsigned inx = 0; (inx < elemCount); inx++)
5111 // The GC information must match what we setup using 'objClass'
5112 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5115 else // this varDsc contains no GC pointers
5117 for (unsigned inx = 0; inx < elemCount; inx++)
5119 // The GC information must match what we setup using 'objClass'
5120 noway_assert(!varTypeIsGC(type[inx]));
5125 // We create a list of GT_LCL_FLDs nodes to pass this struct
5127 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5129 // Create a new tree for 'arg'
5130 // replace the existing LDOBJ(ADDR(LCLVAR))
5131 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5133 unsigned offset = baseOffset;
5134 GenTreeFieldList* listEntry = nullptr;
5135 for (unsigned inx = 0; inx < elemCount; inx++)
5137 elemSize = genTypeSize(type[inx]);
5138 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5139 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5140 if (newArg == nullptr)
5147 // Are we passing a GT_OBJ struct?
5149 else if (argValue->OperGet() == GT_OBJ)
5151 GenTreeObj* argObj = argValue->AsObj();
5152 GenTreePtr baseAddr = argObj->gtOp1;
5153 var_types addrType = baseAddr->TypeGet();
5155 // Create a new tree for 'arg'
5156 // replace the existing LDOBJ(EXPR)
5157 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5160 unsigned offset = 0;
5161 GenTreeFieldList* listEntry = nullptr;
5162 for (unsigned inx = 0; inx < elemCount; inx++)
5164 elemSize = genTypeSize(type[inx]);
5165 GenTreePtr curAddr = baseAddr;
5168 GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5169 noway_assert(baseAddrDup != nullptr);
5170 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5176 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5178 // For safety all GT_IND should have at least GT_GLOB_REF set.
5179 curItem->gtFlags |= GTF_GLOB_REF;
5180 if (fgAddrCouldBeNull(curItem))
5182 // This indirection can cause a GPF if the address could be null.
5183 curItem->gtFlags |= GTF_EXCEPT;
5186 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5187 if (newArg == nullptr)
5197 // If we reach here we should have set newArg to something
5198 if (newArg == nullptr)
5200 gtDispTree(argValue);
5201 assert(!"Missing case in fgMorphMultiregStructArg");
5206 printf("fgMorphMultiregStructArg created tree:\n");
5211 arg = newArg; // consider calling fgMorphTree(newArg);
5213 #endif // FEATURE_MULTIREG_ARGS
5218 // Make a copy of a struct variable if necessary, to pass to a callee.
5219 // returns: tree that computes address of the outgoing arg
5220 void Compiler::fgMakeOutgoingStructArgCopy(
5224 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5225 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5227 GenTree* argx = args->Current();
5228 noway_assert(argx->gtOper != GT_MKREFANY);
5229 // See if we need to insert a copy at all
5230 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5231 // but if there is only one use and no loops, the use must be last.
5232 GenTreeLclVarCommon* lcl = nullptr;
5233 if (argx->OperIsLocal())
5235 lcl = argx->AsLclVarCommon();
5237 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5239 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5243 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5244 if (lvaIsImplicitByRefLocal(varNum))
5246 LclVarDsc* varDsc = &lvaTable[varNum];
5247 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5248 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5249 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5250 // struct parameters if they are passed as arguments to a tail call.
5251 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5253 varDsc->lvRefCnt = 0;
5254 args->gtOp.gtOp1 = lcl;
5255 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5258 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5264 if (fgOutgoingArgTemps == nullptr)
5266 fgOutgoingArgTemps = hashBv::Create(this);
5272 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5273 // We do not reuse within a statement.
5274 if (!opts.MinOpts())
5277 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5279 LclVarDsc* varDsc = &lvaTable[lclNum];
5280 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5281 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5283 tmp = (unsigned)lclNum;
5285 JITDUMP("reusing outgoing struct arg");
5292 // Create the CopyBlk tree and insert it.
5296 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5297 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5298 lvaSetStruct(tmp, copyBlkClass, false);
5299 fgOutgoingArgTemps->setBit(tmp);
5302 fgCurrentlyInUseArgTemps->setBit(tmp);
5304 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5305 // allocated on stack and address of it needs to be passed.
5306 if (lclVarIsSIMDType(tmp))
5308 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5311 // Create a reference to the temp
5312 GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5313 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5315 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5316 // to ref counting of the lclVars.
5317 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5320 if (argx->gtOper == GT_OBJ)
5322 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5326 argx->gtFlags |= GTF_DONT_CSE;
5329 // Copy the valuetype to the temp
5330 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5331 GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5332 copyBlk = fgMorphCopyBlock(copyBlk);
5334 #if FEATURE_FIXED_OUT_ARGS
5336 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5337 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5338 GenTreePtr arg = copyBlk;
5340 #else // FEATURE_FIXED_OUT_ARGS
5342 // Structs are always on the stack, and thus never need temps
5343 // so we have to put the copy and temp all into one expression
5344 GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5346 // Change the expression to "(tmp=val),tmp"
5347 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5349 #endif // FEATURE_FIXED_OUT_ARGS
5351 args->gtOp.gtOp1 = arg;
5352 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5358 // See declaration for specification comment.
5359 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5360 unsigned firstArgRegNum,
5361 regMaskTP* pArgSkippedRegMask)
5363 assert(varDsc->lvPromoted);
5364 // There's no way to do these calculations without breaking abstraction and assuming that
5365 // integer register arguments are consecutive ints. They are on ARM.
5367 // To start, figure out what register contains the last byte of the first argument.
5368 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5369 unsigned lastFldRegOfLastByte =
5370 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5373 // Now we're keeping track of the register that the last field ended in; see what registers
5374 // subsequent fields start in, and whether any are skipped.
5375 // (We assume here the invariant that the fields are sorted in offset order.)
5376 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5378 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5379 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5380 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5381 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5382 // This loop should enumerate the offsets of any registers skipped.
5383 // Find what reg contains the last byte:
5384 // And start at the first register after that. If that isn't the first reg of the current
5385 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5386 skippedRegOffsets++)
5388 // If the register number would not be an arg reg, we're done.
5389 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5391 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5393 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5397 #endif // _TARGET_ARM_
5399 //****************************************************************************
5400 // fgFixupStructReturn:
5401 // The companion to impFixupCallStructReturn. Now that the importer is done
5402 // change the gtType to the precomputed native return type
5403 // requires that callNode currently has a struct type
5405 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5407 assert(varTypeIsStruct(callNode));
5409 GenTreeCall* call = callNode->AsCall();
5410 bool callHasRetBuffArg = call->HasRetBufArg();
5411 bool isHelperCall = call->IsHelperCall();
5413 // Decide on the proper return type for this call that currently returns a struct
5415 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5416 Compiler::structPassingKind howToReturnStruct;
5417 var_types returnType;
5419 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5420 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5422 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5423 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5424 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5428 assert(!callHasRetBuffArg);
5429 assert(retClsHnd == NO_CLASS_HANDLE);
5431 // Now that we are past the importer, re-type this node
5432 howToReturnStruct = SPK_PrimitiveType;
5433 returnType = (var_types)call->gtReturnType;
5437 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5440 if (howToReturnStruct == SPK_ByReference)
5442 assert(returnType == TYP_UNKNOWN);
5443 assert(callHasRetBuffArg);
5447 assert(returnType != TYP_UNKNOWN);
5449 if (returnType != TYP_STRUCT)
5451 // Widen the primitive type if necessary
5452 returnType = genActualType(returnType);
5454 call->gtType = returnType;
5457 #if FEATURE_MULTIREG_RET
5458 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5459 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5460 #else // !FEATURE_MULTIREG_RET
5461 // No more struct returns
5462 assert(call->TypeGet() != TYP_STRUCT);
5465 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5466 // If it was a struct return, it has been transformed into a call
5467 // with a return buffer (that returns TYP_VOID) or into a return
5468 // of a primitive/enregisterable type
5469 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5473 /*****************************************************************************
5475 * A little helper used to rearrange nested commutative operations. The
5476 * effect is that nested associative, commutative operations are transformed
5477 * into a 'left-deep' tree, i.e. into something like this:
5479 * (((a op b) op c) op d) op...
5484 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5492 op1 = tree->gtOp.gtOp1;
5493 op2 = tree->gtOp.gtOp2;
5494 oper = tree->OperGet();
5496 noway_assert(GenTree::OperIsCommutative(oper));
5497 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5498 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5499 noway_assert(oper == op2->gtOper);
5501 // Commutativity doesn't hold if overflow checks are needed
5503 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5508 if (gtIsActiveCSE_Candidate(op2))
5510 // If we have marked op2 as a CSE candidate,
5511 // we can't perform a commutative reordering
5512 // because any value numbers that we computed for op2
5513 // will be incorrect after performing a commutative reordering
5518 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5523 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5524 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5529 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5531 // We could deal with this, but we were always broken and just hit the assert
5532 // below regarding flags, which means it's not frequent, so will just bail out.
5537 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5539 GenTreePtr ad1 = op2->gtOp.gtOp1;
5540 GenTreePtr ad2 = op2->gtOp.gtOp2;
5542 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5543 // We can not reorder such GT_OR trees
5545 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5550 /* Change "(x op (y op z))" to "(x op y) op z" */
5551 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5553 GenTreePtr new_op1 = op2;
5555 new_op1->gtOp.gtOp1 = op1;
5556 new_op1->gtOp.gtOp2 = ad1;
5558 /* Change the flags. */
5560 // Make sure we arent throwing away any flags
5561 noway_assert((new_op1->gtFlags &
5562 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5563 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5564 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5567 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5568 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5570 /* Retype new_op1 if it has not/become a GC ptr. */
5572 if (varTypeIsGC(op1->TypeGet()))
5574 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5575 oper == GT_ADD) || // byref(ref + (int+int))
5576 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5577 oper == GT_OR)); // int(gcref | int(gcref|intval))
5579 new_op1->gtType = tree->gtType;
5581 else if (varTypeIsGC(ad2->TypeGet()))
5583 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5584 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5585 new_op1->gtType = TYP_I_IMPL;
5588 // If new_op1 is a new expression. Assign it a new unique value number.
5589 // vnStore is null before the ValueNumber phase has run
5590 if (vnStore != nullptr)
5592 // We can only keep the old value number on new_op1 if both op1 and ad2
5593 // have the same non-NoVN value numbers. Since op is commutative, comparing
5594 // only ad2 and op1 is enough.
5595 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5596 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5597 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5599 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5603 tree->gtOp.gtOp1 = new_op1;
5604 tree->gtOp.gtOp2 = ad2;
5606 /* If 'new_op1' is now the same nested op, process it recursively */
5608 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5610 fgMoveOpsLeft(new_op1);
5613 /* If 'ad2' is now the same nested op, process it
5614 * Instead of recursion, we set up op1 and op2 for the next loop.
5619 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5626 /*****************************************************************************/
5628 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5630 GenTreeBoundsChk* bndsChk = nullptr;
5631 SpecialCodeKind kind = SCK_RNGCHK_FAIL;
5634 if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5635 #else // FEATURE_SIMD
5636 if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5637 #endif // FEATURE_SIMD
5639 bndsChk = tree->AsBoundsChk();
5640 kind = tree->gtBoundsChk.gtThrowKind;
5644 noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5648 unsigned callStkDepth = fgPtrArgCntCur;
5650 // only x86 pushes args
5651 const unsigned callStkDepth = 0;
5658 // we need to initialize this field
5659 if (fgGlobalMorph && bndsChk != nullptr)
5661 bndsChk->gtStkDepth = callStkDepth;
5665 if (!opts.compDbgCode)
5667 if (delay || compIsForInlining())
5669 /* We delay this until after loop-oriented range check
5670 analysis. For now we merely store the current stack
5671 level in the tree node.
5673 if (bndsChk != nullptr)
5675 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5676 bndsChk->gtStkDepth = callStkDepth;
5681 /* Create/find the appropriate "range-fail" label */
5683 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5684 noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5686 unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5688 BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5690 /* Add the label to the indirection node */
5692 if (bndsChk != nullptr)
5694 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5700 /*****************************************************************************
5702 * Expand a GT_INDEX node and fully morph the child operands
5704 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5705 * the array element. We expand the GT_INDEX node into a larger tree that
5706 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5707 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5708 * For complex array or index expressions one or more GT_COMMA assignments
5709 * are inserted so that we only evaluate the array or index expressions once.
5711 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5712 * perform local constant prop and reorder the constants in the tree and
5715 * We then parse the resulting array element expression in order to locate
5716 * and label the constants and variables that occur in the tree.
5719 const int MAX_ARR_COMPLEXITY = 4;
5720 const int MAX_INDEX_COMPLEXITY = 4;
5722 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5724 noway_assert(tree->gtOper == GT_INDEX);
5725 GenTreeIndex* asIndex = tree->AsIndex();
5727 var_types elemTyp = tree->TypeGet();
5728 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5729 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5731 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5734 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5736 // If this is a SIMD type, this is the point at which we lose the type information,
5737 // so we need to set the correct type on the GT_IND.
5738 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5739 unsigned simdElemSize = 0;
5740 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5742 assert(simdElemSize == elemSize);
5743 elemTyp = getSIMDTypeForSize(elemSize);
5744 // This is the new type of the node.
5745 tree->gtType = elemTyp;
5746 // Now set elemStructType to null so that we don't confuse value numbering.
5747 elemStructType = nullptr;
5750 #endif // FEATURE_SIMD
5752 GenTreePtr arrRef = asIndex->Arr();
5753 GenTreePtr index = asIndex->Index();
5755 // Set up the the array length's offset into lenOffs
5756 // And the the first element's offset into elemOffs
5759 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5761 lenOffs = offsetof(CORINFO_String, stringLen);
5762 elemOffs = offsetof(CORINFO_String, chars);
5763 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5765 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5767 lenOffs = offsetof(CORINFO_RefArray, length);
5768 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5770 else // We have a standard array
5772 lenOffs = offsetof(CORINFO_Array, length);
5773 elemOffs = offsetof(CORINFO_Array, u1Elems);
5776 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5777 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5779 GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5780 GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5781 GenTreePtr bndsChk = nullptr;
5783 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5786 GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5787 GenTreePtr index2 = nullptr;
5789 // If the arrRef expression involves an assignment, a call or reads from global memory,
5790 // then we *must* allocate a temporary in which to "localize" those values,
5791 // to ensure that the same values are used in the bounds check and the actual
5793 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5794 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5795 // complexity is not exposed. (Without that condition there are cases of local struct
5796 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5797 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5799 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5800 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5802 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5803 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5804 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5805 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5809 arrRef2 = gtCloneExpr(arrRef);
5810 noway_assert(arrRef2 != nullptr);
5813 // If the index expression involves an assignment, a call or reads from global memory,
5814 // we *must* allocate a temporary in which to "localize" those values,
5815 // to ensure that the same values are used in the bounds check and the actual
5817 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5819 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5820 (arrRef->OperGet() == GT_FIELD))
5822 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5823 indexDefn = gtNewTempAssign(indexTmpNum, index);
5824 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5825 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5829 index2 = gtCloneExpr(index);
5830 noway_assert(index2 != nullptr);
5833 // Next introduce a GT_ARR_BOUNDS_CHECK node
5834 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5836 #ifdef _TARGET_64BIT_
5837 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5838 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5839 // the comparison will have to be widen to 64 bits.
5840 if (index->TypeGet() == TYP_I_IMPL)
5842 bndsChkType = TYP_I_IMPL;
5844 #endif // _TARGET_64BIT_
5846 GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5848 if (bndsChkType != TYP_INT)
5850 arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5853 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5854 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5856 bndsChk = arrBndsChk;
5858 // Make sure to increment ref-counts if already ref-counted.
5859 if (lvaLocalVarRefCounted)
5861 lvaRecursiveIncRefCounts(index);
5862 lvaRecursiveIncRefCounts(arrRef);
5865 // Now we'll switch to using the second copies for arrRef and index
5866 // to compute the address expression
5872 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5876 #ifdef _TARGET_64BIT_
5877 // Widen 'index' on 64-bit targets
5878 if (index->TypeGet() != TYP_I_IMPL)
5880 if (index->OperGet() == GT_CNS_INT)
5882 index->gtType = TYP_I_IMPL;
5886 index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5889 #endif // _TARGET_64BIT_
5891 /* Scale the index value if necessary */
5894 GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5896 // Fix 392756 WP7 Crossgen
5898 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5899 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5900 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5902 size->gtFlags |= GTF_DONT_CSE;
5904 /* Multiply by the array element size */
5905 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5912 /* Add the object ref to the element's offset */
5914 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5916 /* Add the first element's offset */
5918 GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5920 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5922 #if SMALL_TREE_NODES
5923 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5926 // Change the orginal GT_INDEX node into a GT_IND node
5927 tree->SetOper(GT_IND);
5929 // If the index node is a floating-point type, notify the compiler
5930 // we'll potentially use floating point registers at the time of codegen.
5931 if (varTypeIsFloating(tree->gtType))
5933 this->compFloatingPointUsed = true;
5936 // We've now consumed the GTF_INX_RNGCHK, and the node
5937 // is no longer a GT_INDEX node.
5938 tree->gtFlags &= ~GTF_INX_RNGCHK;
5940 tree->gtOp.gtOp1 = addr;
5942 // This is an array index expression.
5943 tree->gtFlags |= GTF_IND_ARR_INDEX;
5945 /* An indirection will cause a GPF if the address is null */
5946 tree->gtFlags |= GTF_EXCEPT;
5950 tree->gtFlags |= GTF_DONT_CSE;
5953 // Store information about it.
5954 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5956 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5958 GenTreePtr indTree = tree;
5960 // Did we create a bndsChk tree?
5963 // Use a GT_COMMA node to prepend the array bound check
5965 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5967 /* Mark the indirection node as needing a range check */
5968 fgSetRngChkTarget(bndsChk);
5971 if (indexDefn != nullptr)
5973 // Use a GT_COMMA node to prepend the index assignment
5975 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5977 if (arrRefDefn != nullptr)
5979 // Use a GT_COMMA node to prepend the arRef assignment
5981 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5984 // Currently we morph the tree to perform some folding operations prior
5985 // to attaching fieldSeq info and labeling constant array index contributions
5989 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5990 // constant array index contributions, but the morphing operation may have changed
5991 // the 'tree' into something that now unconditionally throws an exception.
5993 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5994 // or it could be left unchanged. If it is unchanged then we should not return,
5995 // instead we should proceed to attaching fieldSeq info, etc...
5997 GenTreePtr arrElem = tree->gtEffectiveVal();
5999 if (fgIsCommaThrow(tree))
6001 if ((arrElem != indTree) || // A new tree node may have been created
6002 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6004 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6008 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6010 addr = arrElem->gtOp.gtOp1;
6012 assert(addr->TypeGet() == TYP_BYREF);
6014 GenTreePtr cnsOff = nullptr;
6015 if (addr->OperGet() == GT_ADD)
6017 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6019 cnsOff = addr->gtOp.gtOp2;
6020 addr = addr->gtOp.gtOp1;
6023 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6025 assert(addr->TypeGet() == TYP_BYREF);
6026 GenTreePtr index = addr->gtOp.gtOp2;
6028 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6029 index->LabelIndex(this);
6031 addr = addr->gtOp.gtOp1;
6033 assert(addr->TypeGet() == TYP_REF);
6035 else if (addr->OperGet() == GT_CNS_INT)
6040 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6042 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6044 // Assign it the [#FirstElem] field sequence
6046 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6048 else // We have folded the first element's offset with the index expression
6050 // Build the [#ConstantIndex, #FirstElem] field sequence
6052 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6053 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6055 if (cnsOff == nullptr) // It must have folded into a zero offset
6057 // Record in the general zero-offset map.
6058 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6062 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6070 /*****************************************************************************
6072 * Wrap fixed stack arguments for varargs functions to go through varargs
6073 * cookie to access them, except for the cookie itself.
6075 * Non-x86 platforms are allowed to access all arguments directly
6076 * so we don't need this code.
6079 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6081 /* For the fixed stack arguments of a varargs function, we need to go
6082 through the varargs cookies to access them, except for the
6085 LclVarDsc* varDsc = &lvaTable[lclNum];
6087 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6089 // Create a node representing the local pointing to the base of the args
6091 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6092 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
6095 // Access the argument through the local
6097 if (varType == TYP_STRUCT)
6099 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6103 tree = gtNewOperNode(GT_IND, varType, ptrArg);
6105 tree->gtFlags |= GTF_IND_TGTANYWHERE;
6107 if (varDsc->lvAddrExposed)
6109 tree->gtFlags |= GTF_GLOB_REF;
6112 return fgMorphTree(tree);
6119 /*****************************************************************************
6121 * Transform the given GT_LCL_VAR tree for code generation.
6124 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
6126 noway_assert(tree->gtOper == GT_LCL_VAR);
6128 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6129 var_types varType = lvaGetRealType(lclNum);
6130 LclVarDsc* varDsc = &lvaTable[lclNum];
6132 if (varDsc->lvAddrExposed)
6134 tree->gtFlags |= GTF_GLOB_REF;
6138 if (info.compIsVarArgs)
6140 GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6141 if (newTree != nullptr)
6143 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6145 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6150 #endif // _TARGET_X86_
6152 /* If not during the global morphing phase bail */
6154 if (!fgGlobalMorph && !forceRemorph)
6159 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6161 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6163 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6165 #if LOCAL_ASSERTION_PROP
6166 /* Assertion prop can tell us to omit adding a cast here */
6167 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6172 /* Small-typed arguments and aliased locals are normalized on load.
6173 Other small-typed locals are normalized on store.
6174 Also, under the debugger as the debugger could write to the variable.
6175 If this is one of the former, insert a narrowing cast on the load.
6176 ie. Convert: var-short --> cast-short(var-int) */
6178 tree->gtType = TYP_INT;
6179 fgMorphTreeDone(tree);
6180 tree = gtNewCastNode(TYP_INT, tree, varType);
6181 fgMorphTreeDone(tree);
6188 /*****************************************************************************
6189 Grab a temp for big offset morphing.
6190 This method will grab a new temp if no temp of this "type" has been created.
6191 Or it will return the same cached one if it has been created.
6193 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6195 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6197 if (lclNum == BAD_VAR_NUM)
6199 // We haven't created a temp for this kind of type. Create one now.
6200 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6201 fgBigOffsetMorphingTemps[type] = lclNum;
6205 // We better get the right type.
6206 noway_assert(lvaTable[lclNum].TypeGet() == type);
6209 noway_assert(lclNum != BAD_VAR_NUM);
6213 /*****************************************************************************
6215 * Transform the given GT_FIELD tree for code generation.
6218 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6220 assert(tree->gtOper == GT_FIELD);
6222 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6223 unsigned fldOffset = tree->gtField.gtFldOffset;
6224 GenTreePtr objRef = tree->gtField.gtFldObj;
6225 bool fieldMayOverlap = false;
6226 bool objIsLocal = false;
6228 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6230 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6231 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6232 // simd field rewrites are sensitive to.
6233 fgMorphImplicitByRefArgs(objRef);
6236 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6237 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6239 if (tree->gtField.gtFldMayOverlap)
6241 fieldMayOverlap = true;
6242 // Reset the flag because we may reuse the node.
6243 tree->gtField.gtFldMayOverlap = false;
6247 // if this field belongs to simd struct, translate it to simd instrinsic.
6250 GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6251 if (newTree != tree)
6253 newTree = fgMorphSmpOp(newTree);
6257 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6259 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6262 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6267 /* Is this an instance data member? */
6272 objIsLocal = objRef->IsLocal();
6274 if (tree->gtFlags & GTF_IND_TLS_REF)
6276 NO_WAY("instance field can not be a TLS ref.");
6279 /* We'll create the expression "*(objRef + mem_offs)" */
6281 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6283 // An optimization for Contextful classes:
6284 // we unwrap the proxy when we have a 'this reference'
6285 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6287 objRef = fgUnwrapProxy(objRef);
6291 Now we have a tree like this:
6293 +--------------------+
6295 +----------+---------+
6297 +--------------+-------------+
6298 | tree->gtField.gtFldObj |
6299 +--------------+-------------+
6302 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6304 +--------------------+
6305 | GT_IND/GT_OBJ | tree
6306 +---------+----------+
6309 +---------+----------+
6311 +---------+----------+
6316 +-------------------+ +----------------------+
6317 | objRef | | fldOffset |
6318 | | | (when fldOffset !=0) |
6319 +-------------------+ +----------------------+
6322 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6325 +--------------------+
6326 | GT_IND/GT_OBJ | tree
6327 +----------+---------+
6329 +----------+---------+
6331 +----------+---------+
6337 +---------+----------+ +---------+----------+
6338 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6339 +---------+----------+ +---------+----------+
6344 +-----+-----+ +-----+-----+ +---------+ +-----------+
6345 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6346 +-----+-----+ +-----+-----+ +---------+ +-----------+
6351 +-----+-----+ +-----+-----+ +-----------+
6352 | tmpLcl | | objRef | | tmpLcl |
6353 +-----------+ +-----------+ +-----------+
6358 var_types objRefType = objRef->TypeGet();
6360 GenTreePtr comma = nullptr;
6362 bool addedExplicitNullCheck = false;
6364 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6365 // and thus is equivalent to a MACK_Ind with zero offset.
6366 MorphAddrContext defMAC(MACK_Ind);
6372 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6373 // This means that we insert an explicit null check whenever we create byref by adding a
6374 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6375 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6376 // small offsets); in this plan, we would transfer some null-checking responsibility to
6377 // callee's of methods taking byref parameters. They would have to add explicit null checks
6378 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6379 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6380 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6381 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6382 // This is left here to point out how to implement it.
6383 CLANG_FORMAT_COMMENT_ANCHOR;
6385 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6387 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6388 // whose address is being taken is either a local or static variable, whose address is necessarily
6389 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6390 if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6391 (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6392 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6393 || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6395 || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6396 (mac->m_totalOffset + fldOffset > 0))
6403 printf("Before explicit null check morphing:\n");
6409 // Create the "comma" subtree
6411 GenTreePtr asg = nullptr;
6416 if (objRef->gtOper != GT_LCL_VAR)
6418 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6420 // Create the "asg" node
6421 asg = gtNewTempAssign(lclNum, objRef);
6425 lclNum = objRef->gtLclVarCommon.gtLclNum;
6428 // Create the "nullchk" node.
6429 // Make it TYP_BYTE so we only deference it for 1 byte.
6430 GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6431 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6433 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6435 // An indirection will cause a GPF if the address is null.
6436 nullchk->gtFlags |= GTF_EXCEPT;
6438 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6439 optMethodFlags |= OMF_HAS_NULLCHECK;
6443 // Create the "comma" node.
6444 comma = gtNewOperNode(GT_COMMA,
6445 TYP_VOID, // We don't want to return anything from this "comma" node.
6446 // Set the type to TYP_VOID, so we can select "cmp" instruction
6447 // instead of "mov" instruction later on.
6455 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6457 addedExplicitNullCheck = true;
6459 else if (fldOffset == 0)
6461 // Generate the "addr" node.
6463 FieldSeqNode* fieldSeq =
6464 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6465 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6472 #ifdef FEATURE_READYTORUN_COMPILER
6473 if (tree->gtField.gtFieldLookup.addr != nullptr)
6475 GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6477 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6479 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6483 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6488 // Generate the "addr" node.
6489 /* Add the member offset to the object's address */
6490 FieldSeqNode* fieldSeq =
6491 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6492 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6493 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6496 // Now let's set the "tree" as a GT_IND tree.
6498 tree->SetOper(GT_IND);
6499 tree->gtOp.gtOp1 = addr;
6501 if (fgAddrCouldBeNull(addr))
6503 // This indirection can cause a GPF if the address could be null.
6504 tree->gtFlags |= GTF_EXCEPT;
6507 if (addedExplicitNullCheck)
6510 // Create "comma2" node and link it to "tree".
6513 comma2 = gtNewOperNode(GT_COMMA,
6514 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6516 tree->gtOp.gtOp1 = comma2;
6522 if (addedExplicitNullCheck)
6524 printf("After adding explicit null check:\n");
6530 else /* This is a static data member */
6532 if (tree->gtFlags & GTF_IND_TLS_REF)
6534 // Thread Local Storage static field reference
6536 // Field ref is a TLS 'Thread-Local-Storage' reference
6538 // Build this tree: IND(*) #
6546 // IND(I_IMPL) == [Base of this DLL's TLS]
6550 // / CNS(IdValue*4) or MUL
6552 // IND(I_IMPL) / CNS(4)
6554 // CNS(TLS_HDL,0x2C) IND
6558 // # Denotes the orginal node
6560 void** pIdAddr = nullptr;
6561 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6564 // If we can we access the TLS DLL index ID value directly
6565 // then pIdAddr will be NULL and
6566 // IdValue will be the actual TLS DLL index ID
6568 GenTreePtr dllRef = nullptr;
6569 if (pIdAddr == nullptr)
6573 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6578 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6579 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6580 dllRef->gtFlags |= GTF_IND_INVARIANT;
6584 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6587 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6589 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6591 GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6593 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6594 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6596 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6597 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6600 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6602 if (dllRef != nullptr)
6604 /* Add the dllRef */
6605 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6608 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6609 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6613 FieldSeqNode* fieldSeq =
6614 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6615 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6617 /* Add the TLS static field offset to the address */
6619 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6622 // Final indirect to get to actual value of TLS static field
6624 tree->SetOper(GT_IND);
6625 tree->gtOp.gtOp1 = tlsRef;
6627 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6631 // Normal static field reference
6634 // If we can we access the static's address directly
6635 // then pFldAddr will be NULL and
6636 // fldAddr will be the actual address of the static field
6638 void** pFldAddr = nullptr;
6639 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6641 if (pFldAddr == nullptr)
6643 #ifdef _TARGET_64BIT_
6644 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6646 // The address is not directly addressible, so force it into a
6647 // constant, so we handle it properly
6649 GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6650 addr->gtType = TYP_I_IMPL;
6651 FieldSeqNode* fieldSeq =
6652 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6653 addr->gtIntCon.gtFieldSeq = fieldSeq;
6654 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6655 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6657 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6658 addr->gtFlags |= GTF_ICON_INITCLASS;
6661 tree->SetOper(GT_IND);
6662 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6663 // We must clear it when we transform the node.
6664 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6665 // that the logic above does its own checking to determine whether a nullcheck is needed.
6666 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6667 tree->gtOp.gtOp1 = addr;
6669 return fgMorphSmpOp(tree);
6672 #endif // _TARGET_64BIT_
6674 // Only volatile or classinit could be set, and they map over
6675 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6676 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6677 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6678 tree->SetOper(GT_CLS_VAR);
6679 tree->gtClsVar.gtClsVarHnd = symHnd;
6680 FieldSeqNode* fieldSeq =
6681 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6682 tree->gtClsVar.gtFieldSeq = fieldSeq;
6689 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6691 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6692 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6694 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6695 addr->gtFlags |= GTF_ICON_INITCLASS;
6698 // There are two cases here, either the static is RVA based,
6699 // in which case the type of the FIELD node is not a GC type
6700 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6701 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6702 // because handles to statics now go into the large object heap
6704 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6705 GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6706 op1->gtFlags |= GTF_IND_INVARIANT;
6708 tree->SetOper(GT_IND);
6709 tree->gtOp.gtOp1 = op1;
6713 noway_assert(tree->gtOper == GT_IND);
6714 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6715 // We must clear it when we transform the node.
6716 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6717 // that the logic above does its own checking to determine whether a nullcheck is needed.
6718 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6720 GenTreePtr res = fgMorphSmpOp(tree);
6722 // If we have a struct type, this node would previously have been under a GT_ADDR,
6723 // and therefore would have been marked GTF_DONT_CSE.
6724 // TODO-1stClassStructs: revisit this.
6725 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6727 res->gtFlags |= GTF_DONT_CSE;
6730 if (fldOffset == 0 && res->OperGet() == GT_IND)
6732 GenTreePtr addr = res->gtOp.gtOp1;
6733 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6734 FieldSeqNode* fieldSeq =
6735 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6736 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6742 //------------------------------------------------------------------------------
6743 // fgMorphCallInline: attempt to inline a call
6746 // call - call expression to inline, inline candidate
6747 // inlineResult - result tracking and reporting
6750 // Attempts to inline the call.
6752 // If successful, callee's IR is inserted in place of the call, and
6753 // is marked with an InlineContext.
6755 // If unsuccessful, the transformations done in anticpation of a
6756 // possible inline are undone, and the candidate flag on the call
6759 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6761 // The call must be a candiate for inlining.
6762 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6764 // Attempt the inline
6765 fgMorphCallInlineHelper(call, inlineResult);
6767 // We should have made up our minds one way or another....
6768 assert(inlineResult->IsDecided());
6770 // If we failed to inline, we have a bit of work to do to cleanup
6771 if (inlineResult->IsFailure())
6776 // Before we do any cleanup, create a failing InlineContext to
6777 // capture details of the inlining attempt.
6778 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6782 // It was an inline candidate, but we haven't expanded it.
6783 if (call->gtCall.gtReturnType != TYP_VOID)
6785 // Detach the GT_CALL tree from the original statement by
6786 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6787 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6789 noway_assert(fgMorphStmt->gtStmtExpr == call);
6790 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6793 // Clear the Inline Candidate flag so we can ensure later we tried
6794 // inlining all candidates.
6796 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6800 /*****************************************************************************
6801 * Helper to attempt to inline a call
6802 * Sets success/failure in inline result
6803 * If success, modifies current method's IR with inlinee's IR
6804 * If failed, undoes any speculative modifications to current method
6807 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6809 // Don't expect any surprises here.
6810 assert(result->IsCandidate());
6812 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6814 // For now, attributing this to call site, though it's really
6815 // more of a budget issue (lvaCount currently includes all
6816 // caller and prospective callee locals). We still might be
6817 // able to inline other callees into this caller, or inline
6818 // this callee in other callers.
6819 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6823 if (call->IsVirtual())
6825 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6829 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6830 // and recursive tail calls as inline candidates.
6831 noway_assert(!call->IsTailPrefixedCall());
6832 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6834 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6835 Although we have checked this in impCanInline, it is possible that later IL instructions
6836 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6839 if (opts.compNeedSecurityCheck)
6841 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6846 // Calling inlinee's compiler to inline the method.
6849 unsigned startVars = lvaCount;
6854 printf("Expanding INLINE_CANDIDATE in statement ");
6855 printTreeID(fgMorphStmt);
6856 printf(" in BB%02u:\n", compCurBB->bbNum);
6857 gtDispTree(fgMorphStmt);
6858 if (call->IsImplicitTailCall())
6860 printf("Note: candidate is implicit tail call\n");
6865 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6868 // Invoke the compiler to inline the call.
6871 fgInvokeInlineeCompiler(call, result);
6873 if (result->IsFailure())
6875 // Undo some changes made in anticipation of inlining...
6877 // Zero out the used locals
6878 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6879 for (unsigned i = startVars; i < lvaCount; i++)
6881 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6884 lvaCount = startVars;
6889 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6899 // printf("After inlining lvaCount=%d.\n", lvaCount);
6904 /*****************************************************************************
6906 * Performs checks to see if this tail call can be optimized as epilog+jmp.
6908 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6910 #if FEATURE_FASTTAILCALL
6911 // Reached here means that return types of caller and callee are tail call compatible.
6912 // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6914 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6915 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6916 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6917 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6918 // currently not copying/setting callSig.
6919 CLANG_FORMAT_COMMENT_ANCHOR;
6922 if (callee->IsTailPrefixedCall())
6924 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6925 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6929 // Note on vararg methods:
6930 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6931 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6932 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6933 // out-going area required for callee is bounded by caller's fixed argument space.
6935 // Note that callee being a vararg method is not a problem since we can account the params being passed.
6937 // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6938 unsigned nCallerArgs = info.compArgsCount;
6940 // Count the callee args including implicit and hidden.
6941 // Note that GenericContext and VarargCookie are added by importer while
6942 // importing the call to gtCallArgs list along with explicit user args.
6943 unsigned nCalleeArgs = 0;
6944 if (callee->gtCallObjp) // thisPtr
6949 if (callee->HasRetBufArg()) // RetBuf
6953 // If callee has RetBuf param, caller too must have it.
6954 // Otherwise go the slow route.
6955 if (info.compRetBuffArg == BAD_VAR_NUM)
6961 // Count user args while tracking whether any of them is a multi-byte params
6962 // that cannot be passed in a register. Note that we don't need to count
6963 // non-standard and secret params passed in registers (e.g. R10, R11) since
6964 // these won't contribute to out-going arg size.
6965 bool hasMultiByteArgs = false;
6966 for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6970 assert(args->OperIsList());
6971 GenTreePtr argx = args->gtOp.gtOp1;
6973 if (varTypeIsStruct(argx))
6975 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
6976 while (argx->gtOper == GT_COMMA)
6978 argx = argx->gtOp.gtOp2;
6981 // Get the size of the struct and see if it is register passable.
6982 CORINFO_CLASS_HANDLE objClass = nullptr;
6984 if (argx->OperGet() == GT_OBJ)
6986 objClass = argx->AsObj()->gtClass;
6988 else if (argx->IsLocal())
6990 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
6992 if (objClass != nullptr)
6994 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
6996 unsigned typeSize = 0;
6997 hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
6999 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
7000 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
7001 // Account for the second eightbyte in the nCalleeArgs.
7002 // https://github.com/dotnet/coreclr/issues/2666
7003 // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
7004 // as two args, since they need two registers whereas nCallerArgs is
7005 // counting such an arg as one. This would mean we will not be optimizing
7006 // certain calls though technically possible.
7008 if (typeSize > TARGET_POINTER_SIZE)
7010 unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
7011 nCalleeArgs += extraArgRegsToAdd;
7013 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
7016 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7018 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7022 hasMultiByteArgs = true;
7027 // Go the slow route, if it has multi-byte params
7028 if (hasMultiByteArgs)
7033 // If we reached here means that callee has only those argument types which can be passed in
7034 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7035 // If we are passing args on stack for callee and it has more args passed on stack than
7036 // caller, then fast tail call cannot be performed.
7038 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7039 // as non-interruptible for fast tail calls.
7040 if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
7051 /*****************************************************************************
7053 * Transform the given GT_CALL tree for tail call code generation.
7055 void Compiler::fgMorphTailCall(GenTreeCall* call)
7057 JITDUMP("fgMorphTailCall (before):\n");
7060 #if defined(_TARGET_ARM_)
7061 // For the helper-assisted tail calls, we need to push all the arguments
7062 // into a single list, and then add a few extra at the beginning
7064 // Check for PInvoke call types that we don't handle in codegen yet.
7065 assert(!call->IsUnmanaged());
7066 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7068 // First move the this pointer (if any) onto the regular arg list
7069 GenTreePtr thisPtr = NULL;
7070 if (call->gtCallObjp)
7072 GenTreePtr objp = call->gtCallObjp;
7073 call->gtCallObjp = NULL;
7075 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7077 thisPtr = gtClone(objp, true);
7078 var_types vt = objp->TypeGet();
7079 if (thisPtr == NULL)
7081 // Too complex, so use a temp
7082 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7083 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7084 if (!call->IsVirtualVtable())
7086 // Add an indirection to get the nullcheck
7087 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7088 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7089 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7091 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7092 thisPtr = gtNewLclvNode(lclNum, vt);
7094 else if (!call->IsVirtualVtable())
7096 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7097 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7098 thisPtr = gtClone(thisPtr, true);
7101 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7104 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7107 // Add the extra VSD parameter if needed
7108 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7109 if (call->IsVirtualStub())
7111 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7114 if (call->gtCallType == CT_INDIRECT)
7116 arg = gtClone(call->gtCallAddr, true);
7117 noway_assert(arg != NULL);
7121 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7122 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7123 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7125 // Change the call type, so we can add the extra indirection here, rather than in codegen
7126 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7127 call->gtStubCallStubAddr = NULL;
7128 call->gtCallType = CT_INDIRECT;
7130 // Add the extra indirection to generate the real target
7131 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7132 call->gtFlags |= GTF_EXCEPT;
7134 // And push the stub address onto the list of arguments
7135 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7137 else if (call->IsVirtualVtable())
7139 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7141 noway_assert(thisPtr != NULL);
7143 GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7144 GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7145 vtbl->gtFlags |= GTF_EXCEPT;
7147 unsigned vtabOffsOfIndirection;
7148 unsigned vtabOffsAfterIndirection;
7149 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
7151 /* Get the appropriate vtable chunk */
7153 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7154 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7156 /* Now the appropriate vtable slot */
7158 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7159 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7161 // Switch this to a plain indirect call
7162 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7163 assert(!call->IsVirtual());
7164 call->gtCallType = CT_INDIRECT;
7166 call->gtCallAddr = vtbl;
7167 call->gtCallCookie = NULL;
7168 call->gtFlags |= GTF_EXCEPT;
7171 // Now inject a placeholder for the real call target that codegen
7173 GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7174 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7175 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7177 // Lastly inject the pointer for the copy routine
7178 noway_assert(call->callSig != NULL);
7179 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7180 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7181 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7183 // It is now a varargs tail call
7184 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7185 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7187 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7189 // x86 classic codegen doesn't require any morphing
7191 // For the helper-assisted tail calls, we need to push all the arguments
7192 // into a single list, and then add a few extra at the beginning or end.
7194 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7196 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7198 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7199 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7200 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7202 // For x86, the tailcall helper is defined as:
7204 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7207 // Note that the special arguments are on the stack, whereas the function arguments follow
7208 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7209 // look like (highest address at the top):
7210 // first normal stack argument
7212 // last normal stack argument
7213 // numberOfOldStackArgs
7214 // numberOfNewStackArgs
7218 // Each special arg is 4 bytes.
7220 // 'flags' is a bitmask where:
7221 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7222 // callee-saved registers for tailcall functions. Note that the helper assumes
7223 // that the callee-saved registers live immediately below EBP, and must have been
7224 // pushed in this order: EDI, ESI, EBX.
7225 // 2 == call target is a virtual stub dispatch.
7227 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7228 // on the custom calling convention.
7230 // Check for PInvoke call types that we don't handle in codegen yet.
7231 assert(!call->IsUnmanaged());
7232 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7234 // Don't support tail calling helper methods
7235 assert(call->gtCallType != CT_HELPER);
7237 // We come this route only for tail prefixed calls that cannot be dispatched as
7239 assert(!call->IsImplicitTailCall());
7240 assert(!fgCanFastTailCall(call));
7242 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7243 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7244 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7245 // addition, for all platforms, we are going to change the call into a helper call. Our code
7246 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7247 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7248 // since special 'this' pointer handling will no longer kick in.
7250 // Some call types, such as virtual vtable calls, require creating a call address expression
7251 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7252 // to create a temporary that is assigned to the "this" pointer expression, and then use
7253 // that temp to create the call address expression. This temp creation embedded statement
7254 // will occur immediately before the "this" pointer argument, and then will be used for both
7255 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7256 // the embedded statement establishing the "this" pointer temp will execute before both uses
7257 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7258 // normal call argument list, and insert a placeholder which will hold the call address
7259 // expression. For non-x86, things are ok, because the order of execution of these is not
7260 // altered. However, for x86, the call address expression is inserted as the *last* argument
7261 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7262 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7263 // for those cases where call lowering creates an embedded form temp of "this", we will
7264 // create a temp here, early, that will later get morphed correctly.
7266 if (call->gtCallObjp)
7268 GenTreePtr thisPtr = nullptr;
7269 GenTreePtr objp = call->gtCallObjp;
7270 call->gtCallObjp = nullptr;
7273 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7276 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7277 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7279 // COMMA(tmp = "this", tmp)
7280 var_types vt = objp->TypeGet();
7281 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7282 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7286 #endif // _TARGET_X86_
7288 #if defined(_TARGET_X86_)
7289 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7290 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7291 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7292 if (call->NeedsNullCheck() || call->IsVirtualStub())
7294 if (call->NeedsNullCheck())
7295 #endif // defined(_TARGET_X86_)
7297 // clone "this" if "this" has no side effects.
7298 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7300 thisPtr = gtClone(objp, true);
7303 var_types vt = objp->TypeGet();
7304 if (thisPtr == nullptr)
7306 // create a temp if either "this" has side effects or "this" is too complex to clone.
7309 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7310 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7312 // COMMA(tmp = "this", deref(tmp))
7313 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7314 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7315 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7317 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7318 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7322 // thisPtr = COMMA(deref("this"), "this")
7323 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7324 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7327 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7334 // During rationalization tmp="this" and null check will
7335 // materialize as embedded stmts in right execution order.
7336 assert(thisPtr != nullptr);
7337 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7340 #if defined(_TARGET_AMD64_)
7342 // Add the extra VSD parameter to arg list in case of VSD calls.
7343 // Tail call arg copying thunk will move this extra VSD parameter
7344 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7345 // in Stublinkerx86.cpp for more details.
7346 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7347 if (call->IsVirtualStub())
7349 GenTreePtr stubAddrArg;
7351 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7353 if (call->gtCallType == CT_INDIRECT)
7355 stubAddrArg = gtClone(call->gtCallAddr, true);
7356 noway_assert(stubAddrArg != nullptr);
7360 noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7362 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7363 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7366 // Push the stub address onto the list of arguments
7367 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7370 // Now inject a placeholder for the real call target that Lower phase will generate.
7371 GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
7372 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7374 // Inject the pointer for the copy routine to be used for struct copying
7375 noway_assert(call->callSig != nullptr);
7376 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7377 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7378 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7380 #else // !_TARGET_AMD64_
7382 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7383 // append to the list.
7384 GenTreeArgList** ppArg = &call->gtCallArgs;
7385 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7387 ppArg = (GenTreeArgList**)&args->gtOp2;
7389 assert(ppArg != nullptr);
7390 assert(*ppArg == nullptr);
7392 unsigned nOldStkArgsWords =
7393 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7394 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7395 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7396 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7398 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7399 // The constant will be replaced.
7400 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7401 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7402 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7404 // Inject a placeholder for the flags.
7405 // The constant will be replaced.
7406 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7407 *ppArg = gtNewListNode(arg1, nullptr);
7408 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7410 // Inject a placeholder for the real call target that the Lowering phase will generate.
7411 // The constant will be replaced.
7412 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7413 *ppArg = gtNewListNode(arg0, nullptr);
7415 #endif // !_TARGET_AMD64_
7417 // It is now a varargs tail call dispatched via helper.
7418 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7419 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7423 JITDUMP("fgMorphTailCall (after):\n");
7427 //------------------------------------------------------------------------------
7428 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7432 // block - basic block ending with a recursive fast tail call
7433 // recursiveTailCall - recursive tail call to transform
7436 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7438 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7440 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7441 GenTreePtr last = block->lastStmt();
7442 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7444 // Transform recursive tail call into a loop.
7446 GenTreePtr earlyArgInsertionPoint = last;
7447 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
7449 // Hoist arg setup statement for the 'this' argument.
7450 GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7451 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7453 GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7454 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7457 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7458 // then the temps need to be assigned to the method parameters. This is done so that the caller
7459 // parameters are not re-assigned before call arguments depending on them are evaluated.
7460 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7461 // where the next temp or parameter assignment should be inserted.
7463 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7464 // while the second call argument (const 1) doesn't.
7465 // Basic block before tail recursion elimination:
7466 // ***** BB04, stmt 1 (top level)
7467 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7468 // [000033] --C - G------ - \--* call void RecursiveMethod
7469 // [000030] ------------ | / --* const int - 1
7470 // [000031] ------------arg0 in rcx + --* +int
7471 // [000029] ------------ | \--* lclVar int V00 arg1
7472 // [000032] ------------arg1 in rdx \--* const int 1
7475 // Basic block after tail recursion elimination :
7476 // ***** BB04, stmt 1 (top level)
7477 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7478 // [000030] ------------ | / --* const int - 1
7479 // [000031] ------------ | / --* +int
7480 // [000029] ------------ | | \--* lclVar int V00 arg1
7481 // [000050] - A---------- \--* = int
7482 // [000049] D------N---- \--* lclVar int V02 tmp0
7484 // ***** BB04, stmt 2 (top level)
7485 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7486 // [000052] ------------ | / --* lclVar int V02 tmp0
7487 // [000054] - A---------- \--* = int
7488 // [000053] D------N---- \--* lclVar int V00 arg0
7490 // ***** BB04, stmt 3 (top level)
7491 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7492 // [000032] ------------ | / --* const int 1
7493 // [000057] - A---------- \--* = int
7494 // [000056] D------N---- \--* lclVar int V01 arg1
7496 GenTreePtr tmpAssignmentInsertionPoint = last;
7497 GenTreePtr paramAssignmentInsertionPoint = last;
7499 // Process early args. They may contain both setup statements for late args and actual args.
7500 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7501 // below has the correct second argument.
7502 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7503 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7504 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7506 GenTreePtr earlyArg = earlyArgs->Current();
7507 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7509 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7511 // This is a setup node so we need to hoist it.
7512 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7513 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7517 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7518 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7519 GenTreePtr paramAssignStmt =
7520 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7521 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7522 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7524 // All temp assignments will happen before the first param assignment.
7525 tmpAssignmentInsertionPoint = paramAssignStmt;
7531 // Process late args.
7532 int lateArgIndex = 0;
7533 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7534 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7536 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7537 GenTreePtr lateArg = lateArgs->Current();
7538 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7539 GenTreePtr paramAssignStmt =
7540 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7541 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7543 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7545 // All temp assignments will happen before the first param assignment.
7546 tmpAssignmentInsertionPoint = paramAssignStmt;
7550 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7551 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7552 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7553 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7555 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7556 GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7557 GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7558 GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7559 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7563 fgRemoveStmt(block, last);
7565 // Set the loop edge.
7566 block->bbJumpKind = BBJ_ALWAYS;
7567 block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7568 fgAddRefPred(block->bbJumpDest, block);
7569 block->bbFlags &= ~BBF_HAS_JMP;
7572 //------------------------------------------------------------------------------
7573 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7577 // arg - argument to assign
7578 // argTabEntry - argument table entry corresponding to arg
7579 // block --- basic block the call is in
7580 // callILOffset - IL offset of the call
7581 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7582 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7585 // parameter assignment statement if one was inserted; nullptr otherwise.
7587 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
7588 fgArgTabEntryPtr argTabEntry,
7590 IL_OFFSETX callILOffset,
7591 GenTreePtr tmpAssignmentInsertionPoint,
7592 GenTreePtr paramAssignmentInsertionPoint)
7594 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7595 // some argument trees may reference parameters directly.
7597 GenTreePtr argInTemp = nullptr;
7598 unsigned originalArgNum = argTabEntry->argNum;
7599 bool needToAssignParameter = true;
7601 // TODO-CQ: enable calls with struct arguments passed in registers.
7602 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7604 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7606 // The argument is already assigned to a temp or is a const.
7609 else if (arg->OperGet() == GT_LCL_VAR)
7611 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7612 LclVarDsc* varDsc = &lvaTable[lclNum];
7613 if (!varDsc->lvIsParam)
7615 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7618 else if (lclNum == originalArgNum)
7620 // The argument is the same parameter local that we were about to assign so
7621 // we can skip the assignment.
7622 needToAssignParameter = false;
7626 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7627 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7628 // more temp assignments.
7630 GenTreePtr paramAssignStmt = nullptr;
7631 if (needToAssignParameter)
7633 if (argInTemp == nullptr)
7635 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7636 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7637 // doesn't involve any caller parameters.
7638 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7639 GenTreePtr tempSrc = arg;
7640 GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7641 GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7642 GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7643 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7644 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7647 // Now assign the temp to the parameter.
7648 LclVarDsc* paramDsc = lvaTable + originalArgNum;
7649 assert(paramDsc->lvIsParam);
7650 GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7651 GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7652 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7654 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7656 return paramAssignStmt;
7659 /*****************************************************************************
7661 * Transform the given GT_CALL tree for code generation.
7664 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7666 if (call->CanTailCall())
7668 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7669 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7671 // It cannot be an inline candidate
7672 assert(!call->IsInlineCandidate());
7674 const char* szFailReason = nullptr;
7675 bool hasStructParam = false;
7676 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7678 szFailReason = "Might turn into an intrinsic";
7681 if (opts.compNeedSecurityCheck)
7683 szFailReason = "Needs security check";
7685 else if (compLocallocUsed)
7687 szFailReason = "Localloc used";
7689 #ifdef _TARGET_AMD64_
7690 // Needed for Jit64 compat.
7691 // In future, enabling tail calls from methods that need GS cookie check
7692 // would require codegen side work to emit GS cookie check before a tail
7694 else if (getNeedsGSSecurityCookie())
7696 szFailReason = "GS Security cookie check";
7700 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7701 else if (opts.compGcChecks)
7703 szFailReason = "GcChecks";
7706 #if FEATURE_TAILCALL_OPT
7709 // We are still not sure whether it can be a tail call. Because, when converting
7710 // a call to an implicit tail call, we must check that there are no locals with
7711 // their address taken. If this is the case, we have to assume that the address
7712 // has been leaked and the current stack frame must live until after the final
7715 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7716 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
7717 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7718 // during morph stage. The reason for also checking lvAddrExposed is that in case
7719 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7720 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7721 // never to be incorrect.
7723 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7724 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7725 // is set. This avoids the need for iterating through all lcl vars of the current
7726 // method. Right now throughout the code base we are not consistently using 'set'
7727 // method to set lvHasLdAddrOp and lvAddrExposed flags.
7730 bool hasAddrExposedVars = false;
7731 bool hasStructPromotedParam = false;
7732 bool hasPinnedVars = false;
7734 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7736 // If the method is marked as an explicit tail call we will skip the
7737 // following three hazard checks.
7738 // We still must check for any struct parameters and set 'hasStructParam'
7739 // so that we won't transform the recursive tail call into a loop.
7741 if (call->IsImplicitTailCall())
7743 if (varDsc->lvHasLdAddrOp)
7745 hasAddrExposedVars = true;
7748 if (varDsc->lvAddrExposed)
7750 if (lvaIsImplicitByRefLocal(varNum))
7752 // The address of the implicit-byref is a non-address use of the pointer parameter.
7754 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
7756 // The address of the implicit-byref's field is likewise a non-address use of the pointer
7759 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
7761 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
7762 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
7763 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
7764 assert(fgGlobalMorph);
7768 hasAddrExposedVars = true;
7772 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
7774 hasStructPromotedParam = true;
7777 if (varDsc->lvPinned)
7779 // A tail call removes the method from the stack, which means the pinning
7780 // goes away for the callee. We can't allow that.
7781 hasPinnedVars = true;
7785 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7787 hasStructParam = true;
7788 // This prevents transforming a recursive tail call into a loop
7789 // but doesn't prevent tail call optimization so we need to
7790 // look at the rest of parameters.
7795 if (hasAddrExposedVars)
7797 szFailReason = "Local address taken";
7799 if (hasStructPromotedParam)
7801 szFailReason = "Has Struct Promoted Param";
7805 szFailReason = "Has Pinned Vars";
7808 #endif // FEATURE_TAILCALL_OPT
7810 if (varTypeIsStruct(call))
7812 fgFixupStructReturn(call);
7815 var_types callType = call->TypeGet();
7817 // We have to ensure to pass the incoming retValBuf as the
7818 // outgoing one. Using a temp will not do as this function will
7819 // not regain control to do the copy.
7821 if (info.compRetBuffArg != BAD_VAR_NUM)
7823 noway_assert(callType == TYP_VOID);
7824 GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7825 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7827 szFailReason = "Need to copy return buffer";
7831 // If this is an opportunistic tail call and cannot be dispatched as
7832 // fast tail call, go the non-tail call route. This is done for perf
7835 // Avoid the cost of determining whether can be dispatched as fast tail
7836 // call if we already know that tail call cannot be honored for other
7838 bool canFastTailCall = false;
7839 if (szFailReason == nullptr)
7841 canFastTailCall = fgCanFastTailCall(call);
7842 if (!canFastTailCall)
7844 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7845 // mechanism and never via tail call helper for perf.
7846 if (call->IsImplicitTailCall())
7848 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7850 #ifndef LEGACY_BACKEND
7851 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7853 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7854 // dispatched as a fast tail call.
7856 // Methods with non-standard args will have indirection cell or cookie param passed
7857 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7858 // tail calling the target method and hence ".tail" prefix on such calls needs to be
7861 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7862 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7863 // This is done by by adding stubAddr as an additional arg before the original list of
7864 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7865 // in Stublinkerx86.cpp.
7866 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7867 "called via helper";
7869 #ifdef _TARGET_ARM64_
7872 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7873 // So, bail out if we can't make fast tail call.
7874 szFailReason = "Non-qualified fast tail call";
7877 #endif // LEGACY_BACKEND
7881 // Clear these flags before calling fgMorphCall() to avoid recursion.
7882 bool isTailPrefixed = call->IsTailPrefixedCall();
7883 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7885 #if FEATURE_TAILCALL_OPT
7886 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7890 if (!canFastTailCall && szFailReason == nullptr)
7892 szFailReason = "Non fast tail calls disabled for PAL based systems.";
7894 #endif // FEATURE_PAL
7896 if (szFailReason != nullptr)
7901 printf("\nRejecting tail call late for call ");
7903 printf(": %s\n", szFailReason);
7907 // for non user funcs, we have no handles to report
7908 info.compCompHnd->reportTailCallDecision(nullptr,
7909 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7910 isTailPrefixed, TAILCALL_FAIL, szFailReason);
7915 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7916 // We enable shared-ret tail call optimization for recursive calls even if
7917 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7918 if (gtIsRecursiveCall(call))
7921 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7922 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7923 if (compCurBB->bbJumpKind != BBJ_RETURN)
7925 compCurBB->bbJumpKind = BBJ_RETURN;
7929 // Set this flag before calling fgMorphCall() to prevent inlining this call.
7930 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7932 bool fastTailCallToLoop = false;
7933 #if FEATURE_TAILCALL_OPT
7934 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7935 // or return type is a struct that can be passed in a register.
7937 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7938 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7939 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
7940 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7941 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
7942 // generic type parameters of both caller and callee generic method are the same.
7943 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7944 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7946 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7947 fastTailCallToLoop = true;
7951 // Do some target-specific transformations (before we process the args, etc.)
7952 // This is needed only for tail prefixed calls that cannot be dispatched as
7954 if (!canFastTailCall)
7956 fgMorphTailCall(call);
7959 // Implementation note : If we optimize tailcall to do a direct jump
7960 // to the target function (after stomping on the return address, etc),
7961 // without using CORINFO_HELP_TAILCALL, we have to make certain that
7962 // we don't starve the hijacking logic (by stomping on the hijacked
7963 // return address etc).
7965 // At this point, we are committed to do the tailcall.
7966 compTailCallUsed = true;
7968 CorInfoTailCall tailCallResult;
7970 if (fastTailCallToLoop)
7972 tailCallResult = TAILCALL_RECURSIVE;
7974 else if (canFastTailCall)
7976 tailCallResult = TAILCALL_OPTIMIZED;
7980 tailCallResult = TAILCALL_HELPER;
7983 // for non user funcs, we have no handles to report
7984 info.compCompHnd->reportTailCallDecision(nullptr,
7985 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7986 isTailPrefixed, tailCallResult, nullptr);
7988 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
7989 // to avoid doing any extra work for the return value.
7990 call->gtType = TYP_VOID;
7995 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
7998 if (fastTailCallToLoop)
8000 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8007 GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
8010 // Tail call needs to be in one of the following IR forms
8011 // Either a call stmt or
8012 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8013 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8014 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8016 // GT_CASTS may be nested.
8017 genTreeOps stmtOper = stmtExpr->gtOper;
8018 if (stmtOper == GT_CALL)
8020 noway_assert(stmtExpr == call);
8024 noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8025 GenTreePtr treeWithCall;
8026 if (stmtOper == GT_RETURN)
8028 treeWithCall = stmtExpr->gtGetOp1();
8030 else if (stmtOper == GT_COMMA)
8032 // Second operation must be nop.
8033 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
8034 treeWithCall = stmtExpr->gtGetOp1();
8038 treeWithCall = stmtExpr->gtGetOp2();
8042 while (treeWithCall->gtOper == GT_CAST)
8044 noway_assert(!treeWithCall->gtOverflow());
8045 treeWithCall = treeWithCall->gtGetOp1();
8048 noway_assert(treeWithCall == call);
8052 // For void calls, we would have created a GT_CALL in the stmt list.
8053 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
8054 // For calls returning structs, we would have a void call, followed by a void return.
8055 // For debuggable code, it would be an assignment of the call to a temp
8056 // We want to get rid of any of this extra trees, and just leave
8058 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8060 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
8061 // Legacy Jit64 Compat:
8062 // There could be any number of GT_NOPs between tail call and GT_RETURN.
8063 // That is tail call pattern could be one of the following:
8064 // 1) tail.call, nop*, ret
8065 // 2) tail.call, nop*, pop, nop*, ret
8066 // 3) var=tail.call, nop*, ret(var)
8067 // 4) var=tail.call, nop*, pop, ret
8068 // 5) comma(tail.call, nop), nop*, ret
8070 // See impIsTailCallILPattern() for details on tail call IL patterns
8071 // that are supported.
8072 if (stmtExpr->gtOper != GT_RETURN)
8074 // First delete all GT_NOPs after the call
8075 GenTreeStmt* morphStmtToRemove = nullptr;
8076 while (nextMorphStmt != nullptr)
8078 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8079 if (!nextStmtExpr->IsNothingNode())
8084 morphStmtToRemove = nextMorphStmt;
8085 nextMorphStmt = morphStmtToRemove->gtNextStmt;
8086 fgRemoveStmt(compCurBB, morphStmtToRemove);
8089 // Check to see if there is a pop.
8090 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
8091 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
8093 // Note that pop opcode may or may not result in a new stmt (for details see
8094 // impImportBlockCode()). Hence, it is not possible to assert about the IR
8095 // form generated by pop but pop tree must be side-effect free so that we can
8096 // delete it safely.
8097 GenTreeStmt* popStmt = nextMorphStmt;
8098 nextMorphStmt = nextMorphStmt->gtNextStmt;
8100 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
8101 // the constituent nodes.
8102 GenTreePtr popExpr = popStmt->gtStmtExpr;
8103 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
8104 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
8106 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
8107 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
8109 noway_assert(isSideEffectFree);
8110 fgRemoveStmt(compCurBB, popStmt);
8113 // Next delete any GT_NOP nodes after pop
8114 while (nextMorphStmt != nullptr)
8116 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8117 if (!nextStmtExpr->IsNothingNode())
8122 morphStmtToRemove = nextMorphStmt;
8123 nextMorphStmt = morphStmtToRemove->gtNextStmt;
8124 fgRemoveStmt(compCurBB, morphStmtToRemove);
8127 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
8129 // Delete GT_RETURN if any
8130 if (nextMorphStmt != nullptr)
8132 GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
8133 noway_assert(retExpr->gtOper == GT_RETURN);
8135 // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
8136 // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
8137 if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
8139 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
8140 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
8141 retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
8144 fgRemoveStmt(compCurBB, nextMorphStmt);
8147 fgMorphStmt->gtStmtExpr = call;
8149 // Tail call via helper: The VM can't use return address hijacking if we're
8150 // not going to return and the helper doesn't have enough info to safely poll,
8151 // so we poll before the tail call, if the block isn't already safe. Since
8152 // tail call via helper is a slow mechanism it doen't matter whether we emit
8153 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8154 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8156 // Note that we can avoid emitting GC-poll if we know that the current BB is
8157 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8158 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8159 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8160 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8161 // low cost check, we check whether the first and current basic blocks are
8164 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8165 // is going to mark the method as fully interruptible if the block containing this tail
8166 // call is reachable without executing any call.
8167 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8168 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8170 // We didn't insert a poll block, so we need to morph the call now
8171 // (Normally it will get morphed when we get to the split poll block)
8172 GenTreePtr temp = fgMorphCall(call);
8173 noway_assert(temp == call);
8176 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8177 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8179 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8180 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8181 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8183 if (canFastTailCall)
8185 compCurBB->bbFlags |= BBF_HAS_JMP;
8189 compCurBB->bbJumpKind = BBJ_THROW;
8192 // For non-void calls, we return a place holder which will be
8193 // used by the parent GT_RETURN node of this call.
8195 GenTree* result = call;
8196 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8199 // Return a dummy node, as the return is already removed.
8200 if (callType == TYP_STRUCT)
8202 // This is a HFA, use float 0.
8203 callType = TYP_FLOAT;
8205 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8206 // Return a dummy node, as the return is already removed.
8207 if (varTypeIsStruct(callType))
8209 // This is a register-returned struct. Return a 0.
8210 // The actual return registers are hacked in lower and the register allocator.
8215 // Return a dummy node, as the return is already removed.
8216 if (varTypeIsSIMD(callType))
8218 callType = TYP_DOUBLE;
8221 result = gtNewZeroConNode(genActualType(callType));
8222 result = fgMorphTree(result);
8230 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8231 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8232 #ifdef FEATURE_READYTORUN_COMPILER
8233 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8236 (call == fgMorphStmt->gtStmtExpr))
8238 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8239 // Transform it into a null check.
8241 GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8243 GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8244 nullCheck->gtFlags |= GTF_EXCEPT;
8246 return fgMorphTree(nullCheck);
8249 noway_assert(call->gtOper == GT_CALL);
8252 // Only count calls once (only in the global morph phase)
8256 if (call->gtCallType == CT_INDIRECT)
8259 optIndirectCallCount++;
8261 else if (call->gtCallType == CT_USER_FUNC)
8264 if (call->IsVirtual())
8266 optIndirectCallCount++;
8271 // Couldn't inline - remember that this BB contains method calls
8273 // If this is a 'regular' call, mark the basic block as
8274 // having a call (for computing full interruptibility).
8275 CLANG_FORMAT_COMMENT_ANCHOR;
8277 #ifdef _TARGET_AMD64_
8278 // Amd64 note: If this is a fast tail call then don't count it as a call
8279 // since we don't insert GC-polls but instead make the method fully GC
8281 if (!call->IsFastTailCall())
8284 if (call->gtCallType == CT_INDIRECT)
8286 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8288 else if (call->gtCallType == CT_USER_FUNC)
8290 if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
8292 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8295 // otherwise we have a CT_HELPER
8298 // Morph Type.op_Equality and Type.op_Inequality
8299 // We need to do this before the arguments are morphed
8300 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8302 CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8304 genTreeOps simpleOp = GT_CALL;
8305 if (methodID == CORINFO_INTRINSIC_TypeEQ)
8309 else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8314 if (simpleOp == GT_EQ || simpleOp == GT_NE)
8316 noway_assert(call->TypeGet() == TYP_INT);
8318 // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8319 // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8320 // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8321 // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8322 // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8323 // optimization below.
8325 GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8326 GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8328 if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8330 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8332 // fgMorphSmpOp will further optimize the following patterns:
8333 // 1. typeof(...) == typeof(...)
8334 // 2. typeof(...) == obj.GetType()
8335 return fgMorphTree(compare);
8340 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8341 GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8343 unsigned retValTmpNum = BAD_VAR_NUM;
8344 CORINFO_CLASS_HANDLE structHnd = nullptr;
8345 if (call->HasRetBufArg() &&
8346 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8348 // We're enforcing the invariant that return buffers pointers (at least for
8349 // struct return types containing GC pointers) are never pointers into the heap.
8350 // The large majority of cases are address of local variables, which are OK.
8351 // Otherwise, allocate a local of the given struct type, pass its address,
8352 // then assign from that into the proper destination. (We don't need to do this
8353 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8354 // will maintain the same invariant.)
8356 GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8357 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8358 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8360 // We'll exempt helper calls from this, assuming that the helper implementation
8361 // follows the old convention, and does whatever barrier is required.
8362 if (call->gtCallType != CT_HELPER)
8364 structHnd = call->gtRetClsHnd;
8365 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8366 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8367 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8371 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8372 lvaSetStruct(retValTmpNum, structHnd, true);
8373 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8378 call->gtCallArgs->gtOp.gtOp1 = dest;
8381 /* Process the "normal" argument list */
8382 call = fgMorphArgs(call);
8383 noway_assert(call->gtOper == GT_CALL);
8385 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8386 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8387 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8389 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8390 if (value->IsIntegralConst(0))
8392 assert(value->OperGet() == GT_CNS_INT);
8394 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8395 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8397 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8398 // the spill trees as well if necessary.
8399 GenTreeOp* argSetup = nullptr;
8400 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8402 GenTree* const arg = earlyArgs->Current();
8403 if (arg->OperGet() != GT_ASG)
8409 assert(arg != index);
8411 arg->gtFlags &= ~GTF_LATE_ARG;
8413 GenTree* op1 = argSetup;
8416 op1 = gtNewNothingNode();
8418 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8422 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8425 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8430 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8431 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8432 return WALK_CONTINUE;
8435 fgWalkTreePost(&arr, resetMorphedFlag);
8436 fgWalkTreePost(&index, resetMorphedFlag);
8437 fgWalkTreePost(&value, resetMorphedFlag);
8440 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8441 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8442 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8443 arrStore->gtFlags |= GTF_ASG;
8445 GenTree* result = fgMorphTree(arrStore);
8446 if (argSetup != nullptr)
8448 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8450 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8458 // Optimize get_ManagedThreadId(get_CurrentThread)
8459 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8460 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8462 noway_assert(origDest == nullptr);
8463 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8465 GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8467 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8468 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8469 CORINFO_INTRINSIC_GetCurrentManagedThread)
8471 // substitute expression with call to helper
8472 GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8473 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8474 return fgMorphTree(newCall);
8478 if (origDest != nullptr)
8480 GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8481 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8482 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8483 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8485 if (origDest->OperGet() == GT_ASG)
8487 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8489 GenTreePtr var = origDest->gtOp.gtOp1;
8490 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8491 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8494 GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8495 copyBlk = fgMorphTree(copyBlk);
8496 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8498 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8503 if (call->IsNoReturn())
8506 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8507 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8508 // As a result the compiler won't need to preserve live registers across the call.
8510 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8511 // Besides, the tail call code is part of the epilog and converting the block to
8512 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8513 // only for BBJ_RETURN blocks.
8515 // Currently this doesn't work for non-void callees. Some of the code that handles
8516 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8517 // do not have this flag by default. We could add the flag here but the proper solution
8518 // would be to replace the return expression with a local var node during inlining
8519 // so the rest of the call tree stays in a separate statement. That statement can then
8520 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8523 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8525 fgRemoveRestOfBlock = true;
8532 /*****************************************************************************
8534 * Transform the given GTK_CONST tree for code generation.
8537 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8539 noway_assert(tree->OperKind() & GTK_CONST);
8541 /* Clear any exception flags or other unnecessary flags
8542 * that may have been set before folding this node to a constant */
8544 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8546 if (tree->OperGet() != GT_CNS_STR)
8551 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8552 // guarantee slow performance for that block. Instead cache the return value
8553 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8555 if (compCurBB->bbJumpKind == BBJ_THROW)
8557 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8558 if (helper != CORINFO_HELP_UNDEF)
8560 // For un-important blocks, we want to construct the string lazily
8562 GenTreeArgList* args;
8563 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8565 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8569 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8570 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8573 tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8574 return fgMorphTree(tree);
8578 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8581 InfoAccessType iat =
8582 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8584 tree = gtNewStringLiteralNode(iat, pValue);
8586 return fgMorphTree(tree);
8589 /*****************************************************************************
8591 * Transform the given GTK_LEAF tree for code generation.
8594 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8596 noway_assert(tree->OperKind() & GTK_LEAF);
8598 if (tree->gtOper == GT_LCL_VAR)
8600 const bool forceRemorph = false;
8601 return fgMorphLocalVar(tree, forceRemorph);
8604 else if (tree->gtOper == GT_LCL_FLD)
8606 if (info.compIsVarArgs)
8608 GenTreePtr newTree =
8609 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8610 if (newTree != nullptr)
8612 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8614 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8620 #endif // _TARGET_X86_
8621 else if (tree->gtOper == GT_FTN_ADDR)
8623 CORINFO_CONST_LOOKUP addrInfo;
8625 #ifdef FEATURE_READYTORUN_COMPILER
8626 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8628 addrInfo = tree->gtFptrVal.gtEntryPoint;
8633 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8636 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8638 tree->SetOper(GT_CNS_INT);
8639 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8640 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8642 switch (addrInfo.accessType)
8645 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8646 tree->gtFlags |= GTF_IND_INVARIANT;
8651 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8655 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8659 noway_assert(!"Unknown addrInfo.accessType");
8662 return fgMorphTree(tree);
8668 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8670 GenTreeLclVarCommon* lclVarCmnTree;
8671 bool isEntire = false;
8672 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8676 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8680 // We consider partial definitions to be modeled as uses followed by definitions.
8681 // This captures the idea that precedings defs are not necessarily made redundant
8682 // by this definition.
8683 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8688 //------------------------------------------------------------------------
8689 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8692 // tree - The block assignment to be possibly morphed
8695 // The modified tree if successful, nullptr otherwise.
8698 // 'tree' must be a block assignment.
8701 // If successful, this method always returns the incoming tree, modifying only
8704 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8706 // This must be a block assignment.
8707 noway_assert(tree->OperIsBlkOp());
8708 var_types asgType = tree->TypeGet();
8710 GenTreePtr asg = tree;
8711 GenTreePtr dest = asg->gtGetOp1();
8712 GenTreePtr src = asg->gtGetOp2();
8713 unsigned destVarNum = BAD_VAR_NUM;
8714 LclVarDsc* destVarDsc = nullptr;
8715 GenTreePtr lclVarTree = nullptr;
8716 bool isCopyBlock = asg->OperIsCopyBlkOp();
8717 bool isInitBlock = !isCopyBlock;
8720 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8722 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8723 // The SIMD type in question could be Vector2f which is 8-bytes in size.
8724 // The below check is to make sure that we don't turn that copyblk
8725 // into a assignment, since rationalizer logic will transform the
8726 // copyblk appropriately. Otherwise, the transformation made in this
8727 // routine will prevent rationalizer logic and we might end up with
8728 // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8730 // TODO-1stClassStructs: This is here to preserve old behavior.
8731 // It should be eliminated.
8732 if (src->OperGet() == GT_SIMD)
8738 if (dest->gtEffectiveVal()->OperIsBlk())
8740 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8741 size = lhsBlk->Size();
8742 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8744 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8745 destVarDsc = &(lvaTable[destVarNum]);
8747 if (lhsBlk->OperGet() == GT_OBJ)
8749 clsHnd = lhsBlk->AsObj()->gtClass;
8754 // Is this an enregisterable struct that is already a simple assignment?
8755 // This can happen if we are re-morphing.
8756 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8760 noway_assert(dest->OperIsLocal());
8762 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8763 destVarDsc = &(lvaTable[destVarNum]);
8766 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8767 size = info.compCompHnd->getClassSize(clsHnd);
8771 size = destVarDsc->lvExactSize;
8776 // See if we can do a simple transformation:
8778 // GT_ASG <TYP_size>
8780 // GT_IND GT_IND or CNS_INT
8785 if (size == REGSIZE_BYTES)
8787 if (clsHnd == NO_CLASS_HANDLE)
8789 // A register-sized cpblk can be treated as an integer asignment.
8790 asgType = TYP_I_IMPL;
8795 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8796 asgType = getJitGCType(gcPtr);
8807 asgType = TYP_SHORT;
8810 #ifdef _TARGET_64BIT_
8814 #endif // _TARGET_64BIT_
8818 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8819 if (!varTypeIsStruct(asgType))
8821 // For initBlk, a non constant source is not going to allow us to fiddle
8822 // with the bits to create a single assigment.
8823 noway_assert(size <= REGSIZE_BYTES);
8825 if (isInitBlock && !src->IsConstInitVal())
8830 if (destVarDsc != nullptr)
8832 #if LOCAL_ASSERTION_PROP
8833 // Kill everything about dest
8834 if (optLocalAssertionProp)
8836 if (optAssertionCount > 0)
8838 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8841 #endif // LOCAL_ASSERTION_PROP
8843 // A previous incarnation of this code also required the local not to be
8844 // address-exposed(=taken). That seems orthogonal to the decision of whether
8845 // to do field-wise assignments: being address-exposed will cause it to be
8846 // "dependently" promoted, so it will be in the right memory location. One possible
8847 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8848 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
8849 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8850 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
8851 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8853 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
8856 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8858 // Use the dest local var directly, as well as its type.
8860 asgType = destVarDsc->lvType;
8862 // If the block operation had been a write to a local var of a small int type,
8863 // of the exact size of the small int type, and the var is NormalizeOnStore,
8864 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8865 // have done that normalization. If we're now making it into an assignment,
8866 // the NormalizeOnStore will work, and it can be a full def.
8867 if (destVarDsc->lvNormalizeOnStore())
8869 dest->gtFlags &= (~GTF_VAR_USEASG);
8874 // Could be a non-promoted struct, or a floating point type local, or
8875 // an int subject to a partial write. Don't enregister.
8876 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8878 // Mark the local var tree as a definition point of the local.
8879 lclVarTree->gtFlags |= GTF_VAR_DEF;
8880 if (size < destVarDsc->lvExactSize)
8881 { // If it's not a full-width assignment....
8882 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8885 if (dest == lclVarTree)
8887 dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8892 // Check to ensure we don't have a reducible *(& ... )
8893 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8895 GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8896 // Ignore reinterpret casts between int/gc
8897 if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8900 asgType = addrOp->TypeGet();
8904 if (dest->gtEffectiveVal()->OperIsIndir())
8906 // If we have no information about the destination, we have to assume it could
8907 // live anywhere (not just in the GC heap).
8908 // Mark the GT_IND node so that we use the correct write barrier helper in case
8909 // the field is a GC ref.
8911 if (!fgIsIndirOfAddrOfLocal(dest))
8913 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8914 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8918 LclVarDsc* srcVarDsc = nullptr;
8921 if (src->OperGet() == GT_LCL_VAR)
8924 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8926 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8928 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8930 if (srcVarDsc != nullptr)
8932 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8934 // Let fgMorphCopyBlock handle it.
8937 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8938 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8940 // Use the src local var directly.
8945 #ifndef LEGACY_BACKEND
8947 // The source argument of the copyblk can potentially
8948 // be accessed only through indir(addr(lclVar))
8949 // or indir(lclVarAddr) in rational form and liveness
8950 // won't account for these uses. That said,
8951 // we have to mark this local as address exposed so
8952 // we don't delete it as a dead store later on.
8953 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
8954 lvaTable[lclVarNum].lvAddrExposed = true;
8955 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8957 #else // LEGACY_BACKEND
8958 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8959 #endif // LEGACY_BACKEND
8961 if (src == lclVarTree)
8963 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
8964 src = gtNewOperNode(GT_IND, asgType, srcAddr);
8968 assert(src->OperIsIndir());
8972 // If we have no information about the src, we have to assume it could
8973 // live anywhere (not just in the GC heap).
8974 // Mark the GT_IND node so that we use the correct write barrier helper in case
8975 // the field is a GC ref.
8977 if (!fgIsIndirOfAddrOfLocal(src))
8979 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8986 if (varTypeIsSIMD(asgType))
8988 assert(!isCopyBlock); // Else we would have returned the tree above.
8989 noway_assert(src->IsIntegralConst(0));
8990 noway_assert(destVarDsc != nullptr);
8992 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
8993 tree->gtOp.gtOp2 = src;
8999 if (src->OperIsInitVal())
9001 src = src->gtGetOp1();
9003 assert(src->IsCnsIntOrI());
9004 // This will mutate the integer constant, in place, to be the correct
9005 // value for the type we are using in the assignment.
9006 src->AsIntCon()->FixupInitBlkValue(asgType);
9010 // Ensure that the dest is setup appropriately.
9011 if (dest->gtEffectiveVal()->OperIsIndir())
9013 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9016 // Ensure that the rhs is setup appropriately.
9019 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9022 // Set the lhs and rhs on the assignment.
9023 if (dest != tree->gtOp.gtOp1)
9025 asg->gtOp.gtOp1 = dest;
9027 if (src != asg->gtOp.gtOp2)
9029 asg->gtOp.gtOp2 = src;
9032 asg->ChangeType(asgType);
9033 dest->gtFlags |= GTF_DONT_CSE;
9034 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9035 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9036 asg->gtFlags &= ~GTF_REVERSE_OPS;
9041 printf("fgMorphOneAsgBlock (after):\n");
9051 //------------------------------------------------------------------------
9052 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9055 // tree - a tree node with a gtOper of GT_INITBLK
9056 // the child nodes for tree have already been Morphed
9059 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9060 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9061 // If we have performed struct promotion of the Dest() then we will try to
9062 // perform a field by field assignment for each of the promoted struct fields
9065 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9066 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9067 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9069 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
9071 // We must have the GT_ASG form of InitBlkOp.
9072 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9074 bool morphed = false;
9077 GenTree* asg = tree;
9078 GenTree* src = tree->gtGetOp2();
9079 GenTree* origDest = tree->gtGetOp1();
9081 GenTree* dest = fgMorphBlkNode(origDest, true);
9082 if (dest != origDest)
9084 tree->gtOp.gtOp1 = dest;
9086 tree->gtType = dest->TypeGet();
9087 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9088 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9089 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9091 src->gtType = TYP_INT;
9093 JITDUMP("\nfgMorphInitBlock:");
9095 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9098 JITDUMP(" using oneAsgTree.\n");
9103 GenTree* destAddr = nullptr;
9104 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9105 GenTree* blockSize = nullptr;
9106 unsigned blockWidth = 0;
9107 FieldSeqNode* destFldSeq = nullptr;
9108 LclVarDsc* destLclVar = nullptr;
9109 bool destDoFldAsg = false;
9110 unsigned destLclNum = BAD_VAR_NUM;
9111 bool blockWidthIsConst = false;
9112 GenTreeLclVarCommon* lclVarTree = nullptr;
9113 if (dest->IsLocal())
9115 lclVarTree = dest->AsLclVarCommon();
9119 if (dest->OperIsBlk())
9121 destAddr = dest->AsBlk()->Addr();
9122 blockWidth = dest->AsBlk()->gtBlkSize;
9126 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9127 destAddr = dest->gtGetOp1();
9128 blockWidth = genTypeSize(dest->TypeGet());
9131 if (lclVarTree != nullptr)
9133 destLclNum = lclVarTree->gtLclNum;
9134 destLclVar = &lvaTable[destLclNum];
9135 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9136 blockWidthIsConst = true;
9140 if (dest->gtOper == GT_DYN_BLK)
9142 // The size must be an integer type
9143 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9144 assert(varTypeIsIntegral(blockSize->gtType));
9148 assert(blockWidth != 0);
9149 blockWidthIsConst = true;
9152 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9154 destLclNum = lclVarTree->gtLclNum;
9155 destLclVar = &lvaTable[destLclNum];
9158 if (destLclNum != BAD_VAR_NUM)
9160 #if LOCAL_ASSERTION_PROP
9161 // Kill everything about destLclNum (and its field locals)
9162 if (optLocalAssertionProp)
9164 if (optAssertionCount > 0)
9166 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9169 #endif // LOCAL_ASSERTION_PROP
9171 if (destLclVar->lvPromoted && blockWidthIsConst)
9173 assert(initVal->OperGet() == GT_CNS_INT);
9174 noway_assert(varTypeIsStruct(destLclVar));
9175 noway_assert(!opts.MinOpts());
9176 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9178 JITDUMP(" dest is address exposed");
9182 if (blockWidth == destLclVar->lvExactSize)
9184 JITDUMP(" (destDoFldAsg=true)");
9185 // We may decide later that a copyblk is required when this struct has holes
9186 destDoFldAsg = true;
9190 JITDUMP(" with mismatched size");
9196 // Can we use field by field assignment for the dest?
9197 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9199 JITDUMP(" dest contains holes");
9200 destDoFldAsg = false;
9203 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9205 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9206 // we need to change it back.
9207 if (!destDoFldAsg && !dest->OperIsBlk())
9209 noway_assert(blockWidth != 0);
9210 tree->gtOp.gtOp1 = origDest;
9211 tree->gtType = origDest->gtType;
9214 if (!destDoFldAsg && (destLclVar != nullptr))
9216 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9217 if (!destLclVar->lvRegStruct)
9219 // Mark it as DoNotEnregister.
9220 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9224 // Mark the dest struct as DoNotEnreg
9225 // when they are LclVar structs and we are using a CopyBlock
9226 // or the struct is not promoted
9230 #if CPU_USES_BLOCK_MOVE
9231 compBlkOpUsed = true;
9233 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9234 tree->gtOp.gtOp1 = dest;
9235 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9239 // The initVal must be a constant of TYP_INT
9240 noway_assert(initVal->OperGet() == GT_CNS_INT);
9241 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9243 // The dest must be of a struct type.
9244 noway_assert(varTypeIsStruct(destLclVar));
9247 // Now, convert InitBlock to individual assignments
9251 INDEBUG(morphed = true);
9255 unsigned fieldLclNum;
9256 unsigned fieldCnt = destLclVar->lvFieldCnt;
9258 for (unsigned i = 0; i < fieldCnt; ++i)
9260 fieldLclNum = destLclVar->lvFieldLclStart + i;
9261 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9263 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9264 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9265 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9267 srcCopy = gtCloneExpr(initVal);
9268 noway_assert(srcCopy != nullptr);
9270 // need type of oper to be same as tree
9271 if (dest->gtType == TYP_LONG)
9273 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9274 // copy and extend the value
9275 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9276 /* Change the types of srcCopy to TYP_LONG */
9277 srcCopy->gtType = TYP_LONG;
9279 else if (varTypeIsFloating(dest->gtType))
9281 srcCopy->ChangeOperConst(GT_CNS_DBL);
9282 // setup the bit pattern
9283 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9284 sizeof(srcCopy->gtDblCon.gtDconVal));
9285 /* Change the types of srcCopy to TYP_DOUBLE */
9286 srcCopy->gtType = TYP_DOUBLE;
9290 noway_assert(srcCopy->gtOper == GT_CNS_INT);
9291 noway_assert(srcCopy->TypeGet() == TYP_INT);
9292 // setup the bit pattern
9293 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9294 sizeof(srcCopy->gtIntCon.gtIconVal));
9297 srcCopy->gtType = dest->TypeGet();
9299 asg = gtNewAssignNode(dest, srcCopy);
9301 #if LOCAL_ASSERTION_PROP
9302 if (optLocalAssertionProp)
9304 optAssertionGen(asg);
9306 #endif // LOCAL_ASSERTION_PROP
9310 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9323 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9327 printf("fgMorphInitBlock (after):\n");
9336 //------------------------------------------------------------------------
9337 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9340 // tree - the node to be modified.
9341 // type - the type of indirection to change it to.
9344 // Returns the node, modified in place.
9347 // This doesn't really warrant a separate method, but is here to abstract
9348 // the fact that these nodes can be modified in-place.
9350 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9352 tree->SetOper(GT_IND);
9353 tree->gtType = type;
9357 //------------------------------------------------------------------------
9358 // fgMorphGetStructAddr: Gets the address of a struct object
9361 // pTree - the parent's pointer to the struct object node
9362 // clsHnd - the class handle for the struct type
9363 // isRValue - true if this is a source (not dest)
9366 // Returns the address of the struct value, possibly modifying the existing tree to
9367 // sink the address below any comma nodes (this is to canonicalize for value numbering).
9368 // If this is a source, it will morph it to an GT_IND before taking its address,
9369 // since it may not be remorphed (and we don't want blk nodes as rvalues).
9371 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9374 GenTree* tree = *pTree;
9375 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9376 // need to hang onto that for the purposes of value numbering.
9377 if (tree->OperIsIndir())
9379 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9381 addr = tree->gtOp.gtOp1;
9385 if (isRValue && tree->OperIsBlk())
9387 tree->ChangeOper(GT_IND);
9389 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9392 else if (tree->gtOper == GT_COMMA)
9394 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9395 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9396 tree->gtType = TYP_BYREF;
9401 switch (tree->gtOper)
9408 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9412 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9413 // not going to use "temp"
9414 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9415 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9424 //------------------------------------------------------------------------
9425 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9428 // tree - The struct type node
9429 // isDest - True if this is the destination of the assignment
9432 // Returns the possibly-morphed node. The caller is responsible for updating
9433 // the parent of this node..
9435 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9437 if (tree->gtOper == GT_COMMA)
9439 GenTree* effectiveVal = tree->gtEffectiveVal();
9440 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9442 addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9444 // In order to CSE and value number array index expressions and bounds checks,
9445 // the commas in which they are contained need to match.
9446 // The pattern is that the COMMA should be the address expression.
9447 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9448 // TODO-1stClassStructs: Consider whether this can be improved.
9449 // Also consider whether some of this can be included in gtNewBlockVal (though note
9450 // that doing so may cause us to query the type system before we otherwise would).
9451 GenTree* lastComma = nullptr;
9452 for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9454 next->gtType = TYP_BYREF;
9457 if (lastComma != nullptr)
9459 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9460 lastComma->gtOp.gtOp2 = addr;
9463 var_types structType = effectiveVal->TypeGet();
9464 if (structType == TYP_STRUCT)
9466 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9467 if (structHnd == NO_CLASS_HANDLE)
9469 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9473 tree = gtNewObjNode(structHnd, addr);
9474 if (tree->OperGet() == GT_OBJ)
9476 gtSetObjGcInfo(tree->AsObj());
9482 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9485 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9489 if (!tree->OperIsBlk())
9493 GenTreeBlk* blkNode = tree->AsBlk();
9494 if (blkNode->OperGet() == GT_DYN_BLK)
9496 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9498 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9499 // A GT_BLK with size of zero is not supported,
9500 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9503 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9504 blkNode->ChangeOper(GT_BLK);
9505 blkNode->gtBlkSize = size;
9517 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9518 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9520 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9521 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9523 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9530 //------------------------------------------------------------------------
9531 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9534 // tree - The block operand
9535 // asgType - The type of the assignment
9536 // blockWidth - The size of the block
9537 // isDest - true iff this is the destination of the assignment
9540 // Returns the morphed block operand
9543 // This does the following:
9544 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9545 // - Ensures that any COMMAs are above ADDR nodes.
9546 // Although 'tree' WAS an operand of a block assignment, the assignment
9547 // may have been retyped to be a scalar assignment.
9549 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9551 GenTree* effectiveVal = tree->gtEffectiveVal();
9553 if (!varTypeIsStruct(asgType))
9555 if (effectiveVal->OperIsIndir())
9557 GenTree* addr = effectiveVal->AsIndir()->Addr();
9558 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9560 effectiveVal = addr->gtGetOp1();
9562 else if (effectiveVal->OperIsBlk())
9564 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9568 effectiveVal->gtType = asgType;
9571 else if (effectiveVal->TypeGet() != asgType)
9573 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9574 effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
9579 GenTreeIndir* indirTree = nullptr;
9580 GenTreeLclVarCommon* lclNode = nullptr;
9581 bool needsIndirection = true;
9583 if (effectiveVal->OperIsIndir())
9585 indirTree = effectiveVal->AsIndir();
9586 GenTree* addr = effectiveVal->AsIndir()->Addr();
9587 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9589 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9592 else if (effectiveVal->OperGet() == GT_LCL_VAR)
9594 lclNode = effectiveVal->AsLclVarCommon();
9597 if (varTypeIsSIMD(asgType))
9599 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9600 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9603 needsIndirection = false;
9604 effectiveVal = indirTree->Addr()->gtGetOp1();
9606 if (effectiveVal->OperIsSIMD())
9608 needsIndirection = false;
9611 #endif // FEATURE_SIMD
9612 if (lclNode != nullptr)
9614 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9615 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9617 #ifndef LEGACY_BACKEND
9618 effectiveVal = lclNode;
9619 needsIndirection = false;
9620 #endif // !LEGACY_BACKEND
9624 // This may be a lclVar that was determined to be address-exposed.
9625 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9628 if (needsIndirection)
9630 if (indirTree != nullptr)
9632 // We should never find a struct indirection on the lhs of an assignment.
9633 assert(!isDest || indirTree->OperIsBlk());
9634 if (!isDest && indirTree->OperIsBlk())
9636 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9642 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9645 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9646 if (clsHnd == NO_CLASS_HANDLE)
9648 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9652 newTree = gtNewObjNode(clsHnd, addr);
9653 if (isDest && (newTree->OperGet() == GT_OBJ))
9655 gtSetObjGcInfo(newTree->AsObj());
9657 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9659 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9660 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9661 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9662 // separately now to avoid excess diffs.
9663 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9669 newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9671 effectiveVal = newTree;
9675 tree = effectiveVal;
9679 //------------------------------------------------------------------------
9680 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9683 // dest - the GT_OBJ or GT_STORE_OBJ
9686 // The destination must be known (by the caller) to be on the stack.
9689 // If we have a CopyObj with a dest on the stack, and its size is small enouch
9690 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9691 // GC Unsafe CopyBlk that is non-interruptible.
9692 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9694 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9696 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9697 assert(dest->gtGcPtrCount != 0);
9698 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9700 bool destOnStack = false;
9701 GenTree* destAddr = dest->Addr();
9702 assert(destAddr->IsLocalAddrExpr() != nullptr);
9704 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9706 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9707 dest->SetOper(newOper);
9708 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9710 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9713 //------------------------------------------------------------------------
9714 // fgMorphCopyBlock: Perform the Morphing of block copy
9717 // tree - a block copy (i.e. an assignment with a block op on the lhs).
9720 // We can return the orginal block copy unmodified (least desirable, but always correct)
9721 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9722 // If we have performed struct promotion of the Source() or the Dest() then we will try to
9723 // perform a field by field assignment for each of the promoted struct fields.
9726 // The child nodes for tree have already been Morphed.
9729 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9730 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9731 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9732 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9733 // can not use a field by field assignment and must the orginal block copy unmodified.
9735 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9737 noway_assert(tree->OperIsCopyBlkOp());
9739 JITDUMP("\nfgMorphCopyBlock:");
9741 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9743 GenTree* asg = tree;
9744 GenTree* rhs = asg->gtGetOp2();
9745 GenTree* dest = asg->gtGetOp1();
9747 #if FEATURE_MULTIREG_RET
9748 // If this is a multi-reg return, we will not do any morphing of this node.
9749 if (rhs->IsMultiRegCall())
9751 assert(dest->OperGet() == GT_LCL_VAR);
9752 JITDUMP(" not morphing a multireg call return\n");
9755 #endif // FEATURE_MULTIREG_RET
9757 // If we have an array index on the lhs, we need to create an obj node.
9759 dest = fgMorphBlkNode(dest, true);
9760 if (dest != asg->gtGetOp1())
9762 asg->gtOp.gtOp1 = dest;
9763 if (dest->IsLocal())
9765 dest->gtFlags |= GTF_VAR_DEF;
9768 asg->gtType = dest->TypeGet();
9769 rhs = fgMorphBlkNode(rhs, false);
9771 asg->gtOp.gtOp2 = rhs;
9773 GenTreePtr oldTree = tree;
9774 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9778 JITDUMP(" using oneAsgTree.\n");
9783 unsigned blockWidth;
9784 bool blockWidthIsConst = false;
9785 GenTreeLclVarCommon* lclVarTree = nullptr;
9786 GenTreeLclVarCommon* srcLclVarTree = nullptr;
9787 unsigned destLclNum = BAD_VAR_NUM;
9788 LclVarDsc* destLclVar = nullptr;
9789 FieldSeqNode* destFldSeq = nullptr;
9790 bool destDoFldAsg = false;
9791 GenTreePtr destAddr = nullptr;
9792 GenTreePtr srcAddr = nullptr;
9793 bool destOnStack = false;
9794 bool hasGCPtrs = false;
9796 JITDUMP("block assignment to morph:\n");
9799 if (dest->IsLocal())
9801 blockWidthIsConst = true;
9803 if (dest->gtOper == GT_LCL_VAR)
9805 lclVarTree = dest->AsLclVarCommon();
9806 destLclNum = lclVarTree->gtLclNum;
9807 destLclVar = &lvaTable[destLclNum];
9808 if (destLclVar->lvType == TYP_STRUCT)
9810 // It would be nice if lvExactSize always corresponded to the size of the struct,
9811 // but it doesn't always for the temps that the importer creates when it spills side
9813 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9814 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9818 blockWidth = genTypeSize(destLclVar->lvType);
9820 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9824 assert(dest->TypeGet() != TYP_STRUCT);
9825 assert(dest->gtOper == GT_LCL_FLD);
9826 blockWidth = genTypeSize(dest->TypeGet());
9827 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9828 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9833 GenTree* effectiveDest = dest->gtEffectiveVal();
9834 if (effectiveDest->OperGet() == GT_IND)
9836 assert(dest->TypeGet() != TYP_STRUCT);
9837 blockWidth = genTypeSize(effectiveDest->TypeGet());
9838 blockWidthIsConst = true;
9839 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9841 destAddr = dest->gtGetOp1();
9846 assert(effectiveDest->OperIsBlk());
9847 GenTreeBlk* blk = effectiveDest->AsBlk();
9849 blockWidth = blk->gtBlkSize;
9850 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9851 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9853 destAddr = blk->Addr();
9856 if (destAddr != nullptr)
9858 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9859 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9862 destLclNum = lclVarTree->gtLclNum;
9863 destLclVar = &lvaTable[destLclNum];
9868 if (destLclVar != nullptr)
9870 #if LOCAL_ASSERTION_PROP
9871 // Kill everything about destLclNum (and its field locals)
9872 if (optLocalAssertionProp)
9874 if (optAssertionCount > 0)
9876 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9879 #endif // LOCAL_ASSERTION_PROP
9881 if (destLclVar->lvPromoted && blockWidthIsConst)
9883 noway_assert(varTypeIsStruct(destLclVar));
9884 noway_assert(!opts.MinOpts());
9886 if (blockWidth == destLclVar->lvExactSize)
9888 JITDUMP(" (destDoFldAsg=true)");
9889 // We may decide later that a copyblk is required when this struct has holes
9890 destDoFldAsg = true;
9894 JITDUMP(" with mismatched dest size");
9899 FieldSeqNode* srcFldSeq = nullptr;
9900 unsigned srcLclNum = BAD_VAR_NUM;
9901 LclVarDsc* srcLclVar = nullptr;
9902 bool srcDoFldAsg = false;
9906 srcLclVarTree = rhs->AsLclVarCommon();
9907 srcLclNum = srcLclVarTree->gtLclNum;
9908 if (rhs->OperGet() == GT_LCL_FLD)
9910 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9913 else if (rhs->OperIsIndir())
9915 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9917 srcLclNum = srcLclVarTree->gtLclNum;
9921 srcAddr = rhs->gtOp.gtOp1;
9925 if (srcLclNum != BAD_VAR_NUM)
9927 srcLclVar = &lvaTable[srcLclNum];
9929 if (srcLclVar->lvPromoted && blockWidthIsConst)
9931 noway_assert(varTypeIsStruct(srcLclVar));
9932 noway_assert(!opts.MinOpts());
9934 if (blockWidth == srcLclVar->lvExactSize)
9936 JITDUMP(" (srcDoFldAsg=true)");
9937 // We may decide later that a copyblk is required when this struct has holes
9942 JITDUMP(" with mismatched src size");
9947 // Check to see if we are required to do a copy block because the struct contains holes
9948 // and either the src or dest is externally visible
9950 bool requiresCopyBlock = false;
9951 bool srcSingleLclVarAsg = false;
9952 bool destSingleLclVarAsg = false;
9954 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9956 // Self-assign; no effect.
9957 GenTree* nop = gtNewNothingNode();
9958 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9962 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
9963 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
9965 requiresCopyBlock = true;
9968 // Can we use field by field assignment for the dest?
9969 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9971 JITDUMP(" dest contains custom layout and contains holes");
9972 // C++ style CopyBlock with holes
9973 requiresCopyBlock = true;
9976 // Can we use field by field assignment for the src?
9977 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
9979 JITDUMP(" src contains custom layout and contains holes");
9980 // C++ style CopyBlock with holes
9981 requiresCopyBlock = true;
9984 #if defined(_TARGET_ARM_)
9985 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
9987 JITDUMP(" rhs is unaligned");
9988 requiresCopyBlock = true;
9991 if (asg->gtFlags & GTF_BLK_UNALIGNED)
9993 JITDUMP(" asg is unaligned");
9994 requiresCopyBlock = true;
9996 #endif // _TARGET_ARM_
9998 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10000 requiresCopyBlock = true;
10003 // Can't use field by field assignment if the src is a call.
10004 if (rhs->OperGet() == GT_CALL)
10006 JITDUMP(" src is a call");
10007 // C++ style CopyBlock with holes
10008 requiresCopyBlock = true;
10011 // If we passed the above checks, then we will check these two
10012 if (!requiresCopyBlock)
10014 // Are both dest and src promoted structs?
10015 if (destDoFldAsg && srcDoFldAsg)
10017 // Both structs should be of the same type, or each have a single field of the same type.
10018 // If not we will use a copy block.
10019 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10020 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10022 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10023 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10024 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10025 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10027 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10028 JITDUMP(" with mismatched types");
10032 // Are neither dest or src promoted structs?
10033 else if (!destDoFldAsg && !srcDoFldAsg)
10035 requiresCopyBlock = true; // Leave as a CopyBlock
10036 JITDUMP(" with no promoted structs");
10038 else if (destDoFldAsg)
10040 // Match the following kinds of trees:
10041 // fgMorphTree BB01, stmt 9 (before)
10042 // [000052] ------------ const int 8
10043 // [000053] -A--G------- copyBlk void
10044 // [000051] ------------ addr byref
10045 // [000050] ------------ lclVar long V07 loc5
10046 // [000054] --------R--- <list> void
10047 // [000049] ------------ addr byref
10048 // [000048] ------------ lclVar struct(P) V06 loc4
10049 // long V06.h (offs=0x00) -> V17 tmp9
10050 // Yields this transformation
10051 // fgMorphCopyBlock (after):
10052 // [000050] ------------ lclVar long V07 loc5
10053 // [000085] -A---------- = long
10054 // [000083] D------N---- lclVar long V17 tmp9
10056 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10057 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10059 // Reject the following tree:
10060 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10062 // fgMorphTree BB01, stmt 6 (before)
10063 // [000038] ------------- const int 4
10064 // [000039] -A--G-------- copyBlk void
10065 // [000037] ------------- addr byref
10066 // [000036] ------------- lclVar int V05 loc3
10067 // [000040] --------R---- <list> void
10068 // [000035] ------------- addr byref
10069 // [000034] ------------- lclVar struct(P) V04 loc2
10070 // float V04.f1 (offs=0x00) -> V13 tmp6
10071 // As this would framsform into
10072 // float V13 = int V05
10074 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10075 var_types destType = lvaTable[fieldLclNum].TypeGet();
10076 if (srcLclVar->TypeGet() == destType)
10078 srcSingleLclVarAsg = true;
10084 assert(srcDoFldAsg);
10085 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10087 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10088 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10089 // [000245] -A------R--- * = struct (copy)
10090 // [000244] -----+------ \--* obj(8) struct
10091 // [000243] -----+------ \--* addr byref
10092 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10094 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10095 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10097 // Check for type agreement
10098 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10099 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10100 if (destLclVar->TypeGet() == srcType)
10102 destSingleLclVarAsg = true;
10108 // If we require a copy block the set both of the field assign bools to false
10109 if (requiresCopyBlock)
10111 // If a copy block is required then we won't do field by field assignments
10112 destDoFldAsg = false;
10113 srcDoFldAsg = false;
10116 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10118 // Mark the dest/src structs as DoNotEnreg
10119 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10120 // or the struct is not promoted
10122 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10124 if (!destLclVar->lvRegStruct)
10126 // Mark it as DoNotEnregister.
10127 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10131 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10133 if (!srcLclVar->lvRegStruct)
10135 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10139 if (requiresCopyBlock)
10141 #if CPU_USES_BLOCK_MOVE
10142 compBlkOpUsed = true;
10144 var_types asgType = dest->TypeGet();
10145 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10146 asg->gtOp.gtOp1 = dest;
10147 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10149 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10150 // Currently that includes x64 and ARM but not x86: the code generation for this
10151 // construct requires the ability to mark certain regions of the generated code
10152 // as non-interruptible, and the GC encoding for the latter platform does not
10153 // have this capability.
10155 // If we have a CopyObj with a dest on the stack
10156 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10157 // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
10158 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10160 if (destOnStack && (dest->OperGet() == GT_OBJ))
10162 fgMorphUnsafeBlk(dest->AsObj());
10165 // Eliminate the "OBJ or BLK" node on the rhs.
10166 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10167 asg->gtOp.gtOp2 = rhs;
10169 #ifdef LEGACY_BACKEND
10170 if (!rhs->OperIsIndir())
10172 noway_assert(rhs->gtOper == GT_LCL_VAR);
10173 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10174 rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
10176 #endif // LEGACY_BACKEND
10177 // Formerly, liveness did not consider copyblk arguments of simple types as being
10178 // a use or def, so these variables were marked as address-exposed.
10179 // TODO-1stClassStructs: This should no longer be needed.
10180 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10182 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10183 lvaTable[srcLclNum].lvAddrExposed = true;
10186 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10188 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10189 lvaTable[destLclNum].lvAddrExposed = true;
10196 // Otherwise we convert this CopyBlock into individual field by field assignments
10201 GenTreePtr addrSpill = nullptr;
10202 unsigned addrSpillTemp = BAD_VAR_NUM;
10203 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10205 unsigned fieldCnt = DUMMY_INIT(0);
10207 if (destDoFldAsg && srcDoFldAsg)
10209 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10210 // All of these conditions were checked above...
10211 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10212 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10214 fieldCnt = destLclVar->lvFieldCnt;
10215 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10218 else if (destDoFldAsg)
10220 fieldCnt = destLclVar->lvFieldCnt;
10221 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10222 if (srcAddr == nullptr)
10224 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10229 assert(srcDoFldAsg);
10230 fieldCnt = srcLclVar->lvFieldCnt;
10231 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10232 if (dest->OperIsBlk())
10234 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10236 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10241 noway_assert(!srcDoFldAsg);
10242 if (gtClone(srcAddr))
10244 // srcAddr is simple expression. No need to spill.
10245 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10249 // srcAddr is complex expression. Clone and spill it (unless the destination is
10250 // a struct local that only has one field, in which case we'd only use the
10251 // address value once...)
10252 if (destLclVar->lvFieldCnt > 1)
10254 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10255 noway_assert(addrSpill != nullptr);
10262 noway_assert(!destDoFldAsg);
10264 // If we're doing field-wise stores, to an address within a local, and we copy
10265 // the address into "addrSpill", do *not* declare the original local var node in the
10266 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10267 // field-wise assignments as an "indirect" assignment to the local.
10268 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10270 if (lclVarTree != nullptr)
10272 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10275 if (gtClone(destAddr))
10277 // destAddr is simple expression. No need to spill
10278 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10282 // destAddr is complex expression. Clone and spill it (unless
10283 // the source is a struct local that only has one field, in which case we'd only
10284 // use the address value once...)
10285 if (srcLclVar->lvFieldCnt > 1)
10287 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10288 noway_assert(addrSpill != nullptr);
10291 // TODO-CQ: this should be based on a more general
10292 // "BaseAddress" method, that handles fields of structs, before or after
10294 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10296 if (addrSpill->gtOp.gtOp1->IsLocal())
10298 // We will *not* consider this to define the local, but rather have each individual field assign
10299 // be a definition.
10300 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10301 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10302 PROMOTION_TYPE_INDEPENDENT);
10303 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10304 // local stack frame
10310 if (addrSpill != nullptr)
10312 // Spill the (complex) address to a BYREF temp.
10313 // Note, at most one address may need to be spilled.
10314 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10316 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10318 if (addrSpillIsStackDest)
10320 lvaTable[addrSpillTemp].lvStackByref = true;
10323 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10325 #ifndef LEGACY_BACKEND
10326 // If we are assigning the address of a LclVar here
10327 // liveness does not account for this kind of address taken use.
10329 // We have to mark this local as address exposed so
10330 // that we don't delete the definition for this LclVar
10331 // as a dead store later on.
10333 if (addrSpill->OperGet() == GT_ADDR)
10335 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10336 if (addrOp->IsLocal())
10338 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10339 lvaTable[lclVarNum].lvAddrExposed = true;
10340 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10343 #endif // !LEGACY_BACKEND
10348 for (unsigned i = 0; i < fieldCnt; ++i)
10350 FieldSeqNode* curFieldSeq = nullptr;
10353 noway_assert(destLclNum != BAD_VAR_NUM);
10354 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10355 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10356 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10357 if (destAddr != nullptr)
10359 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10360 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10364 noway_assert(lclVarTree != nullptr);
10365 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10367 // Don't CSE the lhs of an assignment.
10368 dest->gtFlags |= GTF_DONT_CSE;
10372 noway_assert(srcDoFldAsg);
10373 noway_assert(srcLclNum != BAD_VAR_NUM);
10374 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10376 if (destSingleLclVarAsg)
10378 noway_assert(fieldCnt == 1);
10379 noway_assert(destLclVar != nullptr);
10380 noway_assert(addrSpill == nullptr);
10382 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10388 assert(addrSpillTemp != BAD_VAR_NUM);
10389 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10393 dest = gtCloneExpr(destAddr);
10394 noway_assert(dest != nullptr);
10396 // Is the address of a local?
10397 GenTreeLclVarCommon* lclVarTree = nullptr;
10398 bool isEntire = false;
10399 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10400 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10402 lclVarTree->gtFlags |= GTF_VAR_DEF;
10405 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10410 GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10411 // Have to set the field sequence -- which means we need the field handle.
10412 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10413 CORINFO_FIELD_HANDLE fieldHnd =
10414 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10415 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10416 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10418 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10420 dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10422 // !!! The destination could be on stack. !!!
10423 // This flag will let us choose the correct write barrier.
10424 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10430 noway_assert(srcLclNum != BAD_VAR_NUM);
10431 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10432 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10434 noway_assert(srcLclVarTree != nullptr);
10435 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10436 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10437 // but they are when they are under a GT_ADDR.
10438 src->gtFlags |= GTF_DONT_CSE;
10442 noway_assert(destDoFldAsg);
10443 noway_assert(destLclNum != BAD_VAR_NUM);
10444 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10446 if (srcSingleLclVarAsg)
10448 noway_assert(fieldCnt == 1);
10449 noway_assert(srcLclVar != nullptr);
10450 noway_assert(addrSpill == nullptr);
10452 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10458 assert(addrSpillTemp != BAD_VAR_NUM);
10459 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10463 src = gtCloneExpr(srcAddr);
10464 noway_assert(src != nullptr);
10467 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10468 CORINFO_FIELD_HANDLE fieldHnd =
10469 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10470 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10472 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10473 new (this, GT_CNS_INT)
10474 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10476 src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10480 noway_assert(dest->TypeGet() == src->TypeGet());
10482 asg = gtNewAssignNode(dest, src);
10484 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10485 // and it was of a local, record the assignment as an indirect update of a local.
10486 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10488 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10489 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10490 IndirectAssignmentAnnotation* pIndirAnnot =
10491 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10492 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10495 #if LOCAL_ASSERTION_PROP
10496 if (optLocalAssertionProp)
10498 optAssertionGen(asg);
10500 #endif // LOCAL_ASSERTION_PROP
10504 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10515 tree->gtFlags |= GTF_LATE_ARG;
10519 if (tree != oldTree)
10521 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10526 printf("\nfgMorphCopyBlock (after):\n");
10535 // insert conversions and normalize to make tree amenable to register
10536 // FP architectures
10537 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10539 if (tree->OperIsArithmetic())
10541 if (varTypeIsFloating(tree))
10543 GenTreePtr op1 = tree->gtOp.gtOp1;
10544 GenTreePtr op2 = tree->gtGetOp2();
10546 if (op1->TypeGet() != tree->TypeGet())
10548 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10550 if (op2->TypeGet() != tree->TypeGet())
10552 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10556 else if (tree->OperIsCompare())
10558 GenTreePtr op1 = tree->gtOp.gtOp1;
10560 if (varTypeIsFloating(op1))
10562 GenTreePtr op2 = tree->gtGetOp2();
10563 assert(varTypeIsFloating(op2));
10565 if (op1->TypeGet() != op2->TypeGet())
10567 // both had better be floating, just one bigger than other
10568 if (op1->TypeGet() == TYP_FLOAT)
10570 assert(op2->TypeGet() == TYP_DOUBLE);
10571 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10573 else if (op2->TypeGet() == TYP_FLOAT)
10575 assert(op1->TypeGet() == TYP_DOUBLE);
10576 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10585 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10587 GenTree* op1 = compare->gtOp.gtOp1;
10588 GenTree* op2 = compare->gtOp.gtOp2;
10590 GenTreeCall* opCall;
10592 // recognize this pattern:
10594 // stmtExpr void (IL 0x000... ???)
10598 // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
10599 // const(h) long 0x7fed96836c8 class
10601 // ld.lclVar struct V00 arg0
10604 // which comes from this code (reported by customer as being slow) :
10606 // private static bool IsNull<T>(T arg)
10608 // return arg==null;
10612 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10615 opCall = op2->AsCall();
10617 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10620 opCall = op1->AsCall();
10627 if (!opCns->IsIntegralConst(0))
10632 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10637 // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10638 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
10642 compare->gtOp.gtOp1 = newOp;
10646 compare->gtOp.gtOp2 = newOp;
10652 #ifdef FEATURE_SIMD
10654 //--------------------------------------------------------------------------------------------------------------
10655 // getSIMDStructFromField:
10656 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10657 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
10658 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10659 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10660 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10661 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10662 // if the struct is a SIMD struct.
10665 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10666 // struct used for simd intrinsic or not.
10667 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10668 // to simd lclvar's base type.
10669 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10670 // equals to the index number of this field.
10671 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10672 // equals to the simd struct size which this tree belongs to.
10673 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10674 // the UsedInSIMDIntrinsic check.
10677 // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10678 // instrinic related field, return nullptr.
10681 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10682 var_types* pBaseTypeOut,
10683 unsigned* indexOut,
10684 unsigned* simdSizeOut,
10685 bool ignoreUsedInSIMDIntrinsic /*false*/)
10687 GenTreePtr ret = nullptr;
10688 if (tree->OperGet() == GT_FIELD)
10690 GenTreePtr objRef = tree->gtField.gtFldObj;
10691 if (objRef != nullptr)
10693 GenTreePtr obj = nullptr;
10694 if (objRef->gtOper == GT_ADDR)
10696 obj = objRef->gtOp.gtOp1;
10698 else if (ignoreUsedInSIMDIntrinsic)
10707 if (isSIMDTypeLocal(obj))
10709 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
10710 LclVarDsc* varDsc = &lvaTable[lclNum];
10711 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10713 *simdSizeOut = varDsc->lvExactSize;
10714 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10718 else if (obj->OperGet() == GT_SIMD)
10721 GenTreeSIMD* simdNode = obj->AsSIMD();
10722 *simdSizeOut = simdNode->gtSIMDSize;
10723 *pBaseTypeOut = simdNode->gtSIMDBaseType;
10727 if (ret != nullptr)
10729 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10730 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
10735 /*****************************************************************************
10736 * If a read operation tries to access simd struct field, then transform the
10737 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10738 * Otherwise, return the old tree.
10740 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10741 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10743 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10747 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10749 unsigned index = 0;
10750 var_types baseType = TYP_UNKNOWN;
10751 unsigned simdSize = 0;
10752 GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10753 if (simdStructNode != nullptr)
10755 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10756 GenTree* op2 = gtNewIconNode(index);
10757 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10759 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10765 /*****************************************************************************
10766 * Transform an assignment of a SIMD struct field to SIMD intrinsic
10767 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10768 * then return the old tree.
10770 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10771 * intrinsic, we will morph it as simd intrinsic set.
10773 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10777 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10779 assert(tree->OperGet() == GT_ASG);
10780 GenTreePtr op1 = tree->gtGetOp1();
10781 GenTreePtr op2 = tree->gtGetOp2();
10783 unsigned index = 0;
10784 var_types baseType = TYP_UNKNOWN;
10785 unsigned simdSize = 0;
10786 GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10787 if (simdOp1Struct != nullptr)
10789 // Generate the simd set intrinsic
10790 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10792 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10796 simdIntrinsicID = SIMDIntrinsicSetX;
10799 simdIntrinsicID = SIMDIntrinsicSetY;
10802 simdIntrinsicID = SIMDIntrinsicSetZ;
10805 simdIntrinsicID = SIMDIntrinsicSetW;
10808 noway_assert(!"There is no set intrinsic for index bigger than 3");
10811 GenTreePtr target = gtClone(simdOp1Struct);
10812 assert(target != nullptr);
10813 GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10814 tree->gtOp.gtOp1 = target;
10815 tree->gtOp.gtOp2 = simdTree;
10817 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10824 #endif // FEATURE_SIMD
10826 /*****************************************************************************
10828 * Transform the given GTK_SMPOP tree for code generation.
10832 #pragma warning(push)
10833 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10835 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10837 // this extra scope is a workaround for a gcc bug
10838 // the inline destructor for ALLOCA_CHECK confuses the control
10839 // flow and gcc thinks that the function never returns
10842 noway_assert(tree->OperKind() & GTK_SMPOP);
10844 /* The steps in this function are :
10845 o Perform required preorder processing
10846 o Process the first, then second operand, if any
10847 o Perform required postorder morphing
10848 o Perform optional postorder morphing if optimizing
10851 bool isQmarkColon = false;
10853 #if LOCAL_ASSERTION_PROP
10854 AssertionIndex origAssertionCount = DUMMY_INIT(0);
10855 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
10857 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10858 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
10863 #if !FEATURE_STACK_FP_X87
10864 tree = fgMorphForRegisterFP(tree);
10868 genTreeOps oper = tree->OperGet();
10869 var_types typ = tree->TypeGet();
10870 GenTreePtr op1 = tree->gtOp.gtOp1;
10871 GenTreePtr op2 = tree->gtGetOp2IfPresent();
10873 /*-------------------------------------------------------------------------
10874 * First do any PRE-ORDER processing
10879 // Some arithmetic operators need to use a helper call to the EE
10883 tree = fgDoNormalizeOnStore(tree);
10884 /* fgDoNormalizeOnStore can change op2 */
10885 noway_assert(op1 == tree->gtOp.gtOp1);
10886 op2 = tree->gtOp.gtOp2;
10888 #ifdef FEATURE_SIMD
10890 // We should check whether op2 should be assigned to a SIMD field or not.
10891 // If it is, we should tranlate the tree to simd intrinsic.
10892 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10893 GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10894 typ = tree->TypeGet();
10895 op1 = tree->gtGetOp1();
10896 op2 = tree->gtGetOp2();
10898 assert((tree == newTree) && (tree->OperGet() == oper));
10899 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10901 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10924 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10925 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
10926 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10927 // TODO-1stClassStructs: improve this.
10928 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10930 op1->gtFlags |= GTF_DONT_CSE;
10936 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10937 op1->gtFlags |= GTF_DONT_CSE;
10945 if (op1->OperKind() & GTK_RELOP)
10947 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
10948 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
10949 not need to materialize the result as a 0 or 1. */
10951 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
10952 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
10954 // Request that the codegen for op1 sets the condition flags
10955 // when it generates the code for op1.
10957 // Codegen for op1 must set the condition flags if
10958 // this method returns true.
10960 op1->gtRequestSetFlags();
10964 GenTreePtr effOp1 = op1->gtEffectiveVal();
10965 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
10966 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
10971 #if LOCAL_ASSERTION_PROP
10972 if (optLocalAssertionProp)
10975 isQmarkColon = true;
10980 return fgMorphArrayIndex(tree);
10983 return fgMorphCast(tree);
10987 #ifndef _TARGET_64BIT_
10988 if (typ == TYP_LONG)
10990 /* For (long)int1 * (long)int2, we dont actually do the
10991 casts, and just multiply the 32 bit values, which will
10992 give us the 64 bit result in edx:eax */
10995 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
10996 genActualType(op1->CastFromType()) == TYP_INT &&
10997 genActualType(op2->CastFromType()) == TYP_INT) &&
10998 !op1->gtOverflow() && !op2->gtOverflow())
11000 // The casts have to be of the same signedness.
11001 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11003 // We see if we can force an int constant to change its signedness
11004 GenTreePtr constOp;
11005 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11007 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11010 goto NO_MUL_64RSLT;
11012 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11013 constOp->gtFlags ^= GTF_UNSIGNED;
11015 goto NO_MUL_64RSLT;
11018 // The only combination that can overflow
11019 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11020 goto NO_MUL_64RSLT;
11022 /* Remaining combinations can never overflow during long mul. */
11024 tree->gtFlags &= ~GTF_OVERFLOW;
11026 /* Do unsigned mul only if the casts were unsigned */
11028 tree->gtFlags &= ~GTF_UNSIGNED;
11029 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11031 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11032 the casts to be folded away. So morph the castees directly */
11034 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11035 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11037 // Propagate side effect flags up the tree
11038 op1->gtFlags &= ~GTF_ALL_EFFECT;
11039 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11040 op2->gtFlags &= ~GTF_ALL_EFFECT;
11041 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11043 // If the GT_MUL can be altogether folded away, we should do that.
11045 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11046 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11048 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11049 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11050 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11051 tree = gtFoldExprConst(tree);
11052 noway_assert(tree->OperIsConst());
11056 tree->gtFlags |= GTF_MUL_64RSLT;
11058 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11059 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11061 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11062 // And propagate the new flags. We don't want to CSE the casts because
11063 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11065 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11067 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11068 op1->gtFlags &= ~GTF_ALL_EFFECT;
11069 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11072 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11074 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11075 op2->gtFlags &= ~GTF_ALL_EFFECT;
11076 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11079 op1->gtFlags |= GTF_DONT_CSE;
11080 op2->gtFlags |= GTF_DONT_CSE;
11082 tree->gtFlags &= ~GTF_ALL_EFFECT;
11083 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11085 goto DONE_MORPHING_CHILDREN;
11087 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11090 if (tree->gtOverflow())
11091 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11093 helper = CORINFO_HELP_LMUL;
11095 goto USE_HELPER_FOR_ARITH;
11099 /* We are seeing this node again. We have decided to use
11100 GTF_MUL_64RSLT, so leave it alone. */
11102 assert(tree->gtIsValid64RsltMul());
11105 #endif // !_TARGET_64BIT_
11110 #ifndef _TARGET_64BIT_
11111 if (typ == TYP_LONG)
11113 helper = CORINFO_HELP_LDIV;
11114 goto USE_HELPER_FOR_ARITH;
11117 #if USE_HELPERS_FOR_INT_DIV
11118 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
11120 helper = CORINFO_HELP_DIV;
11121 goto USE_HELPER_FOR_ARITH;
11124 #endif // !_TARGET_64BIT_
11126 #ifndef LEGACY_BACKEND
11127 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11129 op2 = gtFoldExprConst(op2);
11131 #endif // !LEGACY_BACKEND
11136 #ifndef _TARGET_64BIT_
11137 if (typ == TYP_LONG)
11139 helper = CORINFO_HELP_ULDIV;
11140 goto USE_HELPER_FOR_ARITH;
11142 #if USE_HELPERS_FOR_INT_DIV
11143 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
11145 helper = CORINFO_HELP_UDIV;
11146 goto USE_HELPER_FOR_ARITH;
11149 #endif // _TARGET_64BIT_
11154 if (varTypeIsFloating(typ))
11156 helper = CORINFO_HELP_DBLREM;
11158 if (op1->TypeGet() == TYP_FLOAT)
11160 if (op2->TypeGet() == TYP_FLOAT)
11162 helper = CORINFO_HELP_FLTREM;
11166 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
11169 else if (op2->TypeGet() == TYP_FLOAT)
11171 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
11173 goto USE_HELPER_FOR_ARITH;
11176 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11177 // A similar optimization for signed mod will not work for a negative perfectly divisible
11178 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11179 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11180 goto ASSIGN_HELPER_FOR_MOD;
11184 #ifdef _TARGET_ARMARCH_
11186 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
11188 #else // _TARGET_XARCH
11189 /* If this is an unsigned long mod with op2 which is a cast to long from a
11190 constant int, then don't morph to a call to the helper. This can be done
11191 faster inline using idiv.
11195 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11196 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11197 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11199 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11200 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11201 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11202 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11204 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11205 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11208 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11209 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11211 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11212 noway_assert(op1->TypeGet() == TYP_LONG);
11214 // Update flags for op1 morph
11215 tree->gtFlags &= ~GTF_ALL_EFFECT;
11217 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11219 // If op1 is a constant, then do constant folding of the division operator
11220 if (op1->gtOper == GT_CNS_NATIVELONG)
11222 tree = gtFoldExpr(tree);
11227 #endif // _TARGET_XARCH
11229 ASSIGN_HELPER_FOR_MOD:
11231 // For "val % 1", return 0 if op1 doesn't have any side effects
11232 // and we are not in the CSE phase, we cannot discard 'tree'
11233 // because it may contain CSE expressions that we haven't yet examined.
11235 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11237 if (op2->IsIntegralConst(1))
11239 GenTreePtr zeroNode = gtNewZeroConNode(typ);
11241 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11243 DEBUG_DESTROY_NODE(tree);
11248 #ifndef _TARGET_64BIT_
11249 if (typ == TYP_LONG)
11251 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11252 goto USE_HELPER_FOR_ARITH;
11255 #if USE_HELPERS_FOR_INT_DIV
11256 if (typ == TYP_INT)
11258 if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11260 helper = CORINFO_HELP_UMOD;
11261 goto USE_HELPER_FOR_ARITH;
11263 else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11265 helper = CORINFO_HELP_MOD;
11266 goto USE_HELPER_FOR_ARITH;
11270 #endif // !_TARGET_64BIT_
11272 #ifndef LEGACY_BACKEND
11273 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11275 op2 = gtFoldExprConst(op2);
11278 #ifdef _TARGET_ARM64_
11280 // For ARM64 we don't have a remainder instruction,
11281 // The architecture manual suggests the following transformation to
11282 // generate code for such operator:
11284 // a % b = a - (a / b) * b;
11286 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11287 // should already have done so and we do not introduce new modulus nodes in later phases.
11288 assert(!optValnumCSE_phase);
11289 tree = fgMorphModToSubMulDiv(tree->AsOp());
11290 op1 = tree->gtOp.gtOp1;
11291 op2 = tree->gtOp.gtOp2;
11292 #else //_TARGET_ARM64_
11293 // If b is not a power of 2 constant then lowering replaces a % b
11294 // with a - (a / b) * b and applies magic division optimization to
11295 // a / b. The code may already contain an a / b expression (e.g.
11296 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11297 // If we convert % to / here we give CSE the opportunity to eliminate
11298 // the redundant division. If there's no redundant division then
11299 // nothing is lost, lowering would have done this transform anyway.
11301 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11303 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11304 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11305 : static_cast<size_t>(abs(divisorValue));
11307 if (!isPow2(absDivisorValue))
11309 tree = fgMorphModToSubMulDiv(tree->AsOp());
11310 op1 = tree->gtOp.gtOp1;
11311 op2 = tree->gtOp.gtOp2;
11314 #endif //_TARGET_ARM64_
11315 #endif // !LEGACY_BACKEND
11318 USE_HELPER_FOR_ARITH:
11320 /* We have to morph these arithmetic operations into helper calls
11321 before morphing the arguments (preorder), else the arguments
11322 won't get correct values of fgPtrArgCntCur.
11323 However, try to fold the tree first in case we end up with a
11324 simple node which won't need a helper call at all */
11326 noway_assert(tree->OperIsBinary());
11328 GenTreePtr oldTree = tree;
11330 tree = gtFoldExpr(tree);
11332 // Were we able to fold it ?
11333 // Note that gtFoldExpr may return a non-leaf even if successful
11334 // e.g. for something like "expr / 1" - see also bug #290853
11335 if (tree->OperIsLeaf() || (oldTree != tree))
11338 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11341 // Did we fold it into a comma node with throw?
11342 if (tree->gtOper == GT_COMMA)
11344 noway_assert(fgIsCommaThrow(tree));
11345 return fgMorphTree(tree);
11348 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11351 // normalize small integer return values
11352 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11353 (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11355 // Small-typed return values are normalized by the callee
11356 op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11358 // Propagate GTF_COLON_COND
11359 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11361 tree->gtOp.gtOp1 = fgMorphCast(op1);
11363 // Propagate side effect flags
11364 tree->gtFlags &= ~GTF_ALL_EFFECT;
11365 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11374 // Check for typeof(...) == obj.GetType()
11375 // Also check for typeof(...) == typeof(...)
11376 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11377 // type handles and instances of System.Type
11378 // If this invariant is ever broken, the optimization will need updating
11379 CLANG_FORMAT_COMMENT_ANCHOR;
11381 #ifdef LEGACY_BACKEND
11382 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11383 ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11384 (op1->gtCall.gtCallType == CT_HELPER)) &&
11385 ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11386 (op2->gtCall.gtCallType == CT_HELPER)))
11388 if ((((op1->gtOper == GT_INTRINSIC) &&
11389 (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11390 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11391 (((op2->gtOper == GT_INTRINSIC) &&
11392 (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11393 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11396 GenTreePtr pGetClassFromHandle;
11397 GenTreePtr pGetType;
11399 #ifdef LEGACY_BACKEND
11400 bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11401 bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11403 bool bOp1ClassFromHandle =
11404 op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11405 bool bOp2ClassFromHandle =
11406 op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11409 // Optimize typeof(...) == typeof(...)
11410 // Typically this occurs in generic code that attempts a type switch
11411 // e.g. typeof(T) == typeof(int)
11413 if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11415 GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11416 GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11418 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11420 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11422 // Morph and return
11423 return fgMorphTree(compare);
11425 else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11428 // Now check for GetClassFromHandle(handle) == obj.GetType()
11431 if (bOp1ClassFromHandle)
11433 pGetClassFromHandle = tree->gtOp.gtOp1;
11438 pGetClassFromHandle = tree->gtOp.gtOp2;
11442 GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11443 GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
11445 // Unwrap GT_NOP node used to prevent constant folding
11446 if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11448 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11451 // In the ngen case, we have to go thru an indirection to get the right handle.
11452 if (pConstLiteral->gtOper == GT_IND)
11454 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11456 #ifdef LEGACY_BACKEND
11458 if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11459 info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11460 CORINFO_INTRINSIC_Object_GetType &&
11462 if ((pGetType->gtOper == GT_INTRINSIC) &&
11463 (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11465 pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11467 CORINFO_CLASS_HANDLE clsHnd =
11468 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11470 if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11472 // Method Table tree
11473 CLANG_FORMAT_COMMENT_ANCHOR;
11474 #ifdef LEGACY_BACKEND
11475 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11477 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11479 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11480 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11481 optMethodFlags |= OMF_HAS_VTABLEREF;
11483 // Method table constant
11484 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11486 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11488 compare->gtFlags |=
11489 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11491 // Morph and return
11492 return fgMorphTree(compare);
11497 fgMorphRecognizeBoxNullable(tree);
11498 op1 = tree->gtOp.gtOp1;
11499 op2 = tree->gtGetOp2IfPresent();
11503 #ifdef _TARGET_ARM_
11505 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11507 switch (tree->TypeGet())
11510 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11512 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11524 #if !CPU_HAS_FP_SUPPORT
11525 tree = fgMorphToEmulatedFP(tree);
11528 /* Could this operator throw an exception? */
11529 if (fgGlobalMorph && tree->OperMayThrow())
11531 if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11533 /* Mark the tree node as potentially throwing an exception */
11534 tree->gtFlags |= GTF_EXCEPT;
11538 /*-------------------------------------------------------------------------
11539 * Process the first operand, if any
11545 #if LOCAL_ASSERTION_PROP
11546 // If we are entering the "then" part of a Qmark-Colon we must
11547 // save the state of the current copy assignment table
11548 // so that we can restore this state when entering the "else" part
11551 noway_assert(optLocalAssertionProp);
11552 if (optAssertionCount)
11554 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11555 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11556 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11557 origAssertionCount = optAssertionCount;
11558 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11562 origAssertionCount = 0;
11563 origAssertionTab = nullptr;
11566 #endif // LOCAL_ASSERTION_PROP
11568 // We might need a new MorphAddressContext context. (These are used to convey
11569 // parent context about how addresses being calculated will be used; see the
11570 // specification comment for MorphAddrContext for full details.)
11571 // Assume it's an Ind context to start.
11572 MorphAddrContext subIndMac1(MACK_Ind);
11573 MorphAddrContext* subMac1 = mac;
11574 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11576 switch (tree->gtOper)
11579 if (subMac1 == nullptr)
11581 subMac1 = &subIndMac1;
11582 subMac1->m_kind = MACK_Addr;
11586 // In a comma, the incoming context only applies to the rightmost arg of the
11587 // comma list. The left arg (op1) gets a fresh context.
11594 subMac1 = &subIndMac1;
11601 // For additions, if we're in an IND context keep track of whether
11602 // all offsets added to the address are constant, and their sum.
11603 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11605 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11606 GenTreePtr otherOp = tree->gtOp.gtOp2;
11607 // Is the other operator a constant?
11608 if (otherOp->IsCnsIntOrI())
11610 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11611 totalOffset += otherOp->gtIntConCommon.IconValue();
11612 if (totalOffset.IsOverflow())
11614 // We will consider an offset so large as to overflow as "not a constant" --
11615 // we will do a null check.
11616 subMac1->m_allConstantOffsets = false;
11620 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11625 subMac1->m_allConstantOffsets = false;
11629 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11630 // its parent is GT_ADDR, since the address of the field
11631 // is part of an ongoing address computation. Otherwise
11632 // op1 represents the value of the field and so any address
11633 // calculations it does are in a new context.
11634 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11638 // The impact of this field's value to any ongoing
11639 // address computation is handled below when looking
11643 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11645 #if LOCAL_ASSERTION_PROP
11646 // If we are exiting the "then" part of a Qmark-Colon we must
11647 // save the state of the current copy assignment table
11648 // so that we can merge this state with the "else" part exit
11651 noway_assert(optLocalAssertionProp);
11652 if (optAssertionCount)
11654 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11655 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11656 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11657 thenAssertionCount = optAssertionCount;
11658 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11662 thenAssertionCount = 0;
11663 thenAssertionTab = nullptr;
11666 #endif // LOCAL_ASSERTION_PROP
11668 /* Morphing along with folding and inlining may have changed the
11669 * side effect flags, so we have to reset them
11671 * NOTE: Don't reset the exception flags on nodes that may throw */
11673 noway_assert(tree->gtOper != GT_CALL);
11675 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11677 tree->gtFlags &= ~GTF_CALL;
11680 if (!tree->OperMayThrow())
11682 tree->gtFlags &= ~GTF_EXCEPT;
11685 /* Propagate the new flags */
11686 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11688 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11689 // Similarly for clsVar
11690 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11692 tree->gtFlags &= ~GTF_GLOB_REF;
11696 /*-------------------------------------------------------------------------
11697 * Process the second operand, if any
11703 #if LOCAL_ASSERTION_PROP
11704 // If we are entering the "else" part of a Qmark-Colon we must
11705 // reset the state of the current copy assignment table
11708 noway_assert(optLocalAssertionProp);
11709 optAssertionReset(0);
11710 if (origAssertionCount)
11712 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11713 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11714 optAssertionReset(origAssertionCount);
11717 #endif // LOCAL_ASSERTION_PROP
11719 // We might need a new MorphAddressContext context to use in evaluating op2.
11720 // (These are used to convey parent context about how addresses being calculated
11721 // will be used; see the specification comment for MorphAddrContext for full details.)
11722 // Assume it's an Ind context to start.
11723 switch (tree->gtOper)
11726 if (mac != nullptr && mac->m_kind == MACK_Ind)
11728 GenTreePtr otherOp = tree->gtOp.gtOp1;
11729 // Is the other operator a constant?
11730 if (otherOp->IsCnsIntOrI())
11732 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11736 mac->m_allConstantOffsets = false;
11744 // If gtOp2 is a GT_FIELD, we must be taking its value,
11745 // so it should evaluate its address in a new context.
11746 if (op2->gtOper == GT_FIELD)
11748 // The impact of this field's value to any ongoing
11749 // address computation is handled above when looking
11754 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11756 /* Propagate the side effect flags from op2 */
11758 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11760 #if LOCAL_ASSERTION_PROP
11761 // If we are exiting the "else" part of a Qmark-Colon we must
11762 // merge the state of the current copy assignment table with
11763 // that of the exit of the "then" part.
11766 noway_assert(optLocalAssertionProp);
11767 // If either exit table has zero entries then
11768 // the merged table also has zero entries
11769 if (optAssertionCount == 0 || thenAssertionCount == 0)
11771 optAssertionReset(0);
11775 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11776 if ((optAssertionCount != thenAssertionCount) ||
11777 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11779 // Yes they are different so we have to find the merged set
11780 // Iterate over the copy asgn table removing any entries
11781 // that do not have an exact match in the thenAssertionTab
11782 AssertionIndex index = 1;
11783 while (index <= optAssertionCount)
11785 AssertionDsc* curAssertion = optGetAssertion(index);
11787 for (unsigned j = 0; j < thenAssertionCount; j++)
11789 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11791 // Do the left sides match?
11792 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11793 (curAssertion->assertionKind == thenAssertion->assertionKind))
11795 // Do the right sides match?
11796 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11797 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11808 // If we fall out of the loop above then we didn't find
11809 // any matching entry in the thenAssertionTab so it must
11810 // have been killed on that path so we remove it here
11813 // The data at optAssertionTabPrivate[i] is to be removed
11814 CLANG_FORMAT_COMMENT_ANCHOR;
11818 printf("The QMARK-COLON ");
11820 printf(" removes assertion candidate #%d\n", index);
11823 optAssertionRemove(index);
11826 // The data at optAssertionTabPrivate[i] is to be kept
11832 #endif // LOCAL_ASSERTION_PROP
11835 DONE_MORPHING_CHILDREN:
11837 /*-------------------------------------------------------------------------
11838 * Now do POST-ORDER processing
11841 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11842 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11843 // is potentially too conservative, since they'll get treated as having side effects.
11844 // It is important to mark them as calls so if they are part of an argument list,
11845 // they will get sorted and processed properly (for example, it is important to handle
11846 // all nested calls before putting struct arguments in the argument registers). We
11847 // could mark the trees just before argument processing, but it would require a full
11848 // tree walk of the argument tree, so we just do it here, instead, even though we'll
11849 // mark non-argument trees (that will still get converted to calls, anyway).
11850 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11852 tree->gtFlags |= GTF_CALL;
11854 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11856 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11857 (op2 && !varTypeIsGC(op2->TypeGet())))
11859 // The tree is really not GC but was marked as such. Now that the
11860 // children have been unmarked, unmark the tree too.
11862 // Remember that GT_COMMA inherits it's type only from op2
11863 if (tree->gtOper == GT_COMMA)
11865 tree->gtType = genActualType(op2->TypeGet());
11869 tree->gtType = genActualType(op1->TypeGet());
11873 GenTreePtr oldTree = tree;
11875 GenTreePtr qmarkOp1 = nullptr;
11876 GenTreePtr qmarkOp2 = nullptr;
11878 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11880 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11881 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11884 // Try to fold it, maybe we get lucky,
11885 tree = gtFoldExpr(tree);
11887 if (oldTree != tree)
11889 /* if gtFoldExpr returned op1 or op2 then we are done */
11890 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11895 /* If we created a comma-throw tree then we need to morph op1 */
11896 if (fgIsCommaThrow(tree))
11898 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11899 fgMorphTreeDone(tree);
11905 else if (tree->OperKind() & GTK_CONST)
11910 /* gtFoldExpr could have used setOper to change the oper */
11911 oper = tree->OperGet();
11912 typ = tree->TypeGet();
11914 /* gtFoldExpr could have changed op1 and op2 */
11915 op1 = tree->gtOp.gtOp1;
11916 op2 = tree->gtGetOp2IfPresent();
11918 // Do we have an integer compare operation?
11920 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11922 // Are we comparing against zero?
11924 if (op2->IsIntegralConst(0))
11926 // Request that the codegen for op1 sets the condition flags
11927 // when it generates the code for op1.
11929 // Codegen for op1 must set the condition flags if
11930 // this method returns true.
11932 op1->gtRequestSetFlags();
11935 /*-------------------------------------------------------------------------
11936 * Perform the required oper-specific postorder morphing
11940 GenTreePtr cns1, cns2;
11941 GenTreePtr thenNode;
11942 GenTreePtr elseNode;
11943 size_t ival1, ival2;
11944 GenTreePtr lclVarTree;
11945 GenTreeLclVarCommon* lclVarCmnTree;
11946 FieldSeqNode* fieldSeq = nullptr;
11952 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
11953 if (lclVarTree != nullptr)
11955 lclVarTree->gtFlags |= GTF_VAR_DEF;
11958 if (op1->gtEffectiveVal()->OperIsConst())
11960 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
11961 tree->gtOp.gtOp1 = op1;
11964 /* If we are storing a small type, we might be able to omit a cast */
11965 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
11967 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
11969 var_types castType = op2->CastToType();
11971 // If we are performing a narrowing cast and
11972 // castType is larger or the same as op1's type
11973 // then we can discard the cast.
11975 if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
11977 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
11980 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
11982 /* We don't need to zero extend the setcc instruction */
11983 op2->gtType = TYP_BYTE;
11986 // If we introduced a CSE we may need to undo the optimization above
11987 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
11988 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
11989 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
11991 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11992 LclVarDsc* varDsc = &lvaTable[varNum];
11994 /* We again need to zero extend the setcc instruction */
11995 op2->gtType = varDsc->TypeGet();
11997 fgAssignSetVarDef(tree);
12015 /* We can't CSE the LHS of an assignment */
12016 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12017 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12019 op1->gtFlags |= GTF_DONT_CSE;
12026 /* Make sure we're allowed to do this */
12028 if (optValnumCSE_phase)
12030 // It is not safe to reorder/delete CSE's
12036 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12038 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12040 op1 = tree->gtOp.gtOp1;
12042 /* Since this can occur repeatedly we use a while loop */
12044 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
12045 (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
12046 (op1->gtOverflow() == false))
12048 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12050 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12051 ival2 = cns2->gtIntCon.gtIconVal;
12053 if (op1->gtOper == GT_ADD)
12061 cns2->gtIntCon.gtIconVal = ival2;
12063 #ifdef _TARGET_64BIT_
12064 // we need to properly re-sign-extend or truncate as needed.
12065 cns2->AsIntCon()->TruncateOrSignExtend32();
12066 #endif // _TARGET_64BIT_
12068 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12073 // Here we look for the following tree
12079 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12081 // cast to unsigned allows test for both 0 and 1
12082 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12084 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12086 else // cast to UINT64 allows test for both 0 and 1
12087 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12089 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12092 if (ival2 != INT_MAX)
12094 // If we don't have a comma and relop, we can't do this optimization
12096 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12098 // Here we look for the following transformation
12100 // EQ/NE Possible REVERSE(RELOP)
12102 // COMMA CNS 0/1 -> COMMA relop_op2
12104 // x RELOP x relop_op1
12106 // relop_op1 relop_op2
12110 GenTreePtr comma = op1;
12111 GenTreePtr relop = comma->gtOp.gtOp2;
12113 GenTreePtr relop_op1 = relop->gtOp.gtOp1;
12115 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12119 gtReverseCond(relop);
12122 relop->gtOp.gtOp1 = comma;
12123 comma->gtOp.gtOp2 = relop_op1;
12125 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12126 comma->gtFlags &= ~GTF_ALL_EFFECT;
12127 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12128 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12130 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12131 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12133 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12138 if (op1->gtOper == GT_COMMA)
12140 // Here we look for the following tree
12141 // and when the LCL_VAR is a temp we can fold the tree:
12145 // COMMA CNS 0/1 -> RELOP CNS 0/1
12153 GenTreePtr asg = op1->gtOp.gtOp1;
12154 GenTreePtr lcl = op1->gtOp.gtOp2;
12156 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12157 if (asg->gtOper != GT_ASG)
12162 /* The right side of the comma must be a LCL_VAR temp */
12163 if (lcl->gtOper != GT_LCL_VAR)
12168 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12169 noway_assert(lclNum < lvaCount);
12171 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12172 if (!lvaTable[lclNum].lvIsTemp)
12178 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12179 // Fix 383856 X86/ARM ILGEN
12180 if (lclNumIsCSE(lclNum))
12186 /* We also must be assigning the result of a RELOP */
12187 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12192 /* Both of the LCL_VAR must match */
12193 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12198 /* If right side of asg is not a RELOP then skip */
12199 if (!asg->gtOp.gtOp2->OperIsCompare())
12204 LclVarDsc* varDsc = lvaTable + lclNum;
12206 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12207 op1 = asg->gtOp.gtOp2;
12209 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12210 DEBUG_DESTROY_NODE(lcl);
12212 /* This local variable should never be used again */
12214 // VSW 184221: Make RefCnt to zero to indicate that this local var
12215 // is not used any more. (Keey the lvType as is.)
12216 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12217 // And then emitter::emitEndCodeGen will assert in the following line:
12218 // noway_assert( dsc->lvTracked);
12220 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12221 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
12222 // and it only shows up twice.
12224 lvaTable[lclNum].lvRefCnt = 0;
12225 lvaTable[lclNum].lvaResetSortAgainFlag(this);
12228 if (op1->OperIsCompare())
12230 // Here we look for the following tree
12232 // EQ/NE -> RELOP/!RELOP
12237 // Note that we will remove/destroy the EQ/NE node and move
12238 // the RELOP up into it's location.
12240 /* Here we reverse the RELOP if necessary */
12242 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12246 gtReverseCond(op1);
12249 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12250 op1->gtType = tree->gtType;
12252 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12253 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12255 DEBUG_DESTROY_NODE(tree);
12260 // Now we check for a compare with the result of an '&' operator
12262 // Here we look for the following transformation:
12266 // AND CNS 0/1 -> AND CNS 0
12268 // RSZ/RSH CNS 1 x CNS (1 << y)
12272 if (op1->gtOper == GT_AND)
12274 GenTreePtr andOp = op1;
12275 GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12277 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12282 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12287 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12289 if (shiftAmount < 0)
12294 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12299 if (andOp->gtType == TYP_INT)
12301 if (shiftAmount > 31)
12306 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12308 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12310 // Reverse the cond if necessary
12313 gtReverseCond(tree);
12314 cns2->gtIntCon.gtIconVal = 0;
12315 oper = tree->gtOper;
12318 else if (andOp->gtType == TYP_LONG)
12320 if (shiftAmount > 63)
12325 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12327 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12329 // Reverse the cond if necessary
12332 gtReverseCond(tree);
12333 cns2->gtIntConCommon.SetLngValue(0);
12334 oper = tree->gtOper;
12338 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12340 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12341 DEBUG_DESTROY_NODE(rshiftOp);
12343 } // END if (ival2 != INT_MAX)
12346 /* Now check for compares with small constant longs that can be cast to int */
12348 if (!cns2->OperIsConst())
12353 if (cns2->TypeGet() != TYP_LONG)
12358 /* Is the constant 31 bits or smaller? */
12360 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12365 /* Is the first comparand mask operation of type long ? */
12367 if (op1->gtOper != GT_AND)
12369 /* Another interesting case: cast from int */
12371 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12372 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12373 !op1->gtOverflow()) // cannot be an overflow checking cast
12375 /* Simply make this into an integer comparison */
12377 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12378 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12384 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12386 /* Is the result of the mask effectively an INT ? */
12388 GenTreePtr andMask;
12389 andMask = op1->gtOp.gtOp2;
12390 if (andMask->gtOper != GT_CNS_NATIVELONG)
12394 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12399 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12401 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12403 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12405 noway_assert(andMask == op1->gtOp.gtOp2);
12407 ival1 = (int)andMask->gtIntConCommon.LngValue();
12408 andMask->SetOper(GT_CNS_INT);
12409 andMask->gtType = TYP_INT;
12410 andMask->gtIntCon.gtIconVal = ival1;
12412 /* now change the type of the AND node */
12414 op1->gtType = TYP_INT;
12416 /* finally we replace the comparand */
12418 ival2 = (int)cns2->gtIntConCommon.LngValue();
12419 cns2->SetOper(GT_CNS_INT);
12420 cns2->gtType = TYP_INT;
12422 noway_assert(cns2 == op2);
12423 cns2->gtIntCon.gtIconVal = ival2;
12432 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12434 if (op2->gtOper == GT_CNS_INT)
12437 /* Check for "expr relop 1" */
12438 if (cns2->IsIntegralConst(1))
12440 /* Check for "expr >= 1" */
12443 /* Change to "expr > 0" */
12447 /* Check for "expr < 1" */
12448 else if (oper == GT_LT)
12450 /* Change to "expr <= 0" */
12455 /* Check for "expr relop -1" */
12456 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12458 /* Check for "expr <= -1" */
12461 /* Change to "expr < 0" */
12465 /* Check for "expr > -1" */
12466 else if (oper == GT_GT)
12468 /* Change to "expr >= 0" */
12472 // IF we get here we should be changing 'oper'
12473 assert(tree->OperGet() != oper);
12475 // Keep the old ValueNumber for 'tree' as the new expr
12476 // will still compute the same value as before
12477 tree->SetOper(oper, GenTree::PRESERVE_VN);
12478 cns2->gtIntCon.gtIconVal = 0;
12480 // vnStore is null before the ValueNumber phase has run
12481 if (vnStore != nullptr)
12483 // Update the ValueNumber for 'cns2', as we just changed it to 0
12484 fgValueNumberTreeConst(cns2);
12487 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12492 else // we have an unsigned comparison
12494 if (op2->IsIntegralConst(0))
12496 if ((oper == GT_GT) || (oper == GT_LE))
12498 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12499 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12500 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12501 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12502 // occurs as a result of branch inversion.
12503 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12504 tree->SetOper(oper, GenTree::PRESERVE_VN);
12505 tree->gtFlags &= ~GTF_UNSIGNED;
12512 noway_assert(tree->OperKind() & GTK_RELOP);
12514 /* Check if the result of the comparison is used for a jump.
12515 * If not then only the int (i.e. 32 bit) case is handled in
12516 * the code generator through the (x86) "set" instructions.
12517 * For the rest of the cases, the simplest way is to
12518 * "simulate" the comparison with ?:
12520 * On ARM, we previously used the IT instruction, but the IT instructions
12521 * have mostly been declared obsolete and off-limits, so all cases on ARM
12522 * get converted to ?: */
12524 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12526 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12529 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12530 op1->gtRequestSetFlags();
12532 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12533 op2 = fgMorphTree(op2);
12535 tree = gtNewQmarkNode(TYP_INT, op1, op2);
12537 fgMorphTreeDone(tree);
12545 /* If op1 is a comma throw node then we won't be keeping op2 */
12546 if (fgIsCommaThrow(op1))
12551 /* Get hold of the two branches */
12553 noway_assert(op2->OperGet() == GT_COLON);
12554 elseNode = op2->AsColon()->ElseNode();
12555 thenNode = op2->AsColon()->ThenNode();
12557 /* Try to hoist assignments out of qmark colon constructs.
12558 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12560 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12561 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12562 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12564 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12566 GenTreePtr asg = thenNode;
12567 GenTreePtr colon = op2;
12568 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12569 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12570 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12571 asg->gtOp.gtOp2 = tree;
12573 // Asg will have all the flags that the QMARK had
12574 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12576 // Colon flag won't have the flags that x had.
12577 colon->gtFlags &= ~GTF_ALL_EFFECT;
12578 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12580 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12581 DEBUG_DESTROY_NODE(elseNode);
12586 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12588 if (elseNode->IsNothingNode())
12590 /* This can only happen for VOID ?: */
12591 noway_assert(op2->gtType == TYP_VOID);
12593 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12594 if (thenNode->IsNothingNode())
12596 // We may be able to throw away op1 (unless it has side-effects)
12598 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12600 /* Just return a a Nop Node */
12605 /* Just return the relop, but clear the special flags. Note
12606 that we can't do that for longs and floats (see code under
12607 COMPARE label above) */
12609 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12611 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12618 GenTreePtr tmp = elseNode;
12620 op2->AsColon()->ElseNode() = elseNode = thenNode;
12621 op2->AsColon()->ThenNode() = thenNode = tmp;
12622 gtReverseCond(op1);
12626 #if !defined(_TARGET_ARM_)
12627 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12629 // Don't do this optimization for ARM: we always require assignment
12630 // to boolean to remain ?:, since we don't have any way to generate
12631 // this with straight-line code, like x86 does using setcc (at least
12632 // after the IT instruction is deprecated).
12634 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12635 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12637 ival1 = thenNode->gtIntCon.gtIconVal;
12638 ival2 = elseNode->gtIntCon.gtIconVal;
12640 // Is one constant 0 and the other 1?
12641 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12643 // If the constants are {1, 0}, reverse the condition
12646 gtReverseCond(op1);
12649 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12650 // needs to materialize the result as a 0 or 1.
12651 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12652 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12654 DEBUG_DESTROY_NODE(tree);
12655 DEBUG_DESTROY_NODE(op2);
12660 #endif // !_TARGET_ARM_
12662 break; // end case GT_QMARK
12666 #ifndef _TARGET_64BIT_
12667 if (typ == TYP_LONG)
12669 // This must be GTF_MUL_64RSLT
12670 assert(tree->gtIsValid64RsltMul());
12673 #endif // _TARGET_64BIT_
12678 if (tree->gtOverflow())
12683 // TODO #4104: there are a lot of other places where
12684 // this condition is not checked before transformations.
12687 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12690 if (op2->IsCnsIntOrI())
12692 /* Negate the constant and change the node to be "+" */
12694 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12696 tree->ChangeOper(oper);
12700 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12703 if (op1->IsCnsIntOrI())
12705 noway_assert(varTypeIsIntOrI(tree));
12707 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12708 // node should be the same
12709 // as the type of the tree, i.e. tree->gtType.
12710 fgMorphTreeDone(op2);
12713 tree->ChangeOper(oper);
12717 /* No match - exit */
12721 #ifdef _TARGET_ARM64_
12723 if (!varTypeIsFloating(tree->gtType))
12725 // Codegen for this instruction needs to be able to throw two exceptions:
12726 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12727 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12731 // Codegen for this instruction needs to be able to throw one exception:
12732 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12739 if (tree->gtOverflow())
12741 tree->gtRequestSetFlags();
12743 // Add the excptn-throwing basic block to jump to on overflow
12745 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12747 // We can't do any commutative morphing for overflow instructions
12758 /* Commute any non-REF constants to the right */
12761 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12763 // TODO-Review: We used to assert here that
12764 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12765 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12766 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12767 // Task is to re-enable this assertion and investigate.
12769 /* Swap the operands */
12770 tree->gtOp.gtOp1 = op2;
12771 tree->gtOp.gtOp2 = op1;
12774 op2 = tree->gtOp.gtOp2;
12777 /* See if we can fold GT_ADD nodes. */
12779 if (oper == GT_ADD)
12781 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12783 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12784 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12785 !op1->gtOverflow() && !op2->gtOverflow())
12787 cns1 = op1->gtOp.gtOp2;
12788 cns2 = op2->gtOp.gtOp2;
12789 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12790 #ifdef _TARGET_64BIT_
12791 if (cns1->TypeGet() == TYP_INT)
12793 // we need to properly re-sign-extend or truncate after adding two int constants above
12794 cns1->AsIntCon()->TruncateOrSignExtend32();
12796 #endif //_TARGET_64BIT_
12798 tree->gtOp.gtOp2 = cns1;
12799 DEBUG_DESTROY_NODE(cns2);
12801 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12802 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12803 DEBUG_DESTROY_NODE(op2);
12804 op2 = tree->gtOp.gtOp2;
12807 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12809 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12811 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12812 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12814 cns1 = op1->gtOp.gtOp2;
12815 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12816 op2->gtIntConCommon.IconValue());
12817 #ifdef _TARGET_64BIT_
12818 if (op2->TypeGet() == TYP_INT)
12820 // we need to properly re-sign-extend or truncate after adding two int constants above
12821 op2->AsIntCon()->TruncateOrSignExtend32();
12823 #endif //_TARGET_64BIT_
12825 if (cns1->OperGet() == GT_CNS_INT)
12827 op2->gtIntCon.gtFieldSeq =
12828 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12830 DEBUG_DESTROY_NODE(cns1);
12832 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12833 DEBUG_DESTROY_NODE(op1);
12834 op1 = tree->gtOp.gtOp1;
12839 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12842 // If this addition is adding an offset to a null pointer,
12843 // avoid the work and yield the null pointer immediately.
12844 // Dereferencing the pointer in either case will have the
12847 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12848 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12850 op2->gtType = tree->gtType;
12851 DEBUG_DESTROY_NODE(op1);
12852 DEBUG_DESTROY_NODE(tree);
12856 // Remove the addition iff it won't change the tree type
12859 if (!gtIsActiveCSE_Candidate(op2) &&
12860 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12862 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12863 (op2->gtIntCon.gtFieldSeq != nullptr) &&
12864 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12866 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12869 DEBUG_DESTROY_NODE(op2);
12870 DEBUG_DESTROY_NODE(tree);
12877 /* See if we can fold GT_MUL by const nodes */
12878 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12880 #ifndef _TARGET_64BIT_
12881 noway_assert(typ <= TYP_UINT);
12882 #endif // _TARGET_64BIT_
12883 noway_assert(!tree->gtOverflow());
12885 ssize_t mult = op2->gtIntConCommon.IconValue();
12886 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12887 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12889 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12893 // We may be able to throw away op1 (unless it has side-effects)
12895 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12897 DEBUG_DESTROY_NODE(op1);
12898 DEBUG_DESTROY_NODE(tree);
12899 return op2; // Just return the "0" node
12902 // We need to keep op1 for the side-effects. Hang it off
12905 tree->ChangeOper(GT_COMMA);
12909 size_t abs_mult = (mult >= 0) ? mult : -mult;
12910 size_t lowestBit = genFindLowestBit(abs_mult);
12911 bool changeToShift = false;
12913 // is it a power of two? (positive or negative)
12914 if (abs_mult == lowestBit)
12916 // if negative negate (min-int does not need negation)
12917 if (mult < 0 && mult != SSIZE_T_MIN)
12919 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12920 fgMorphTreeDone(op1);
12923 // If "op2" is a constant array index, the other multiplicand must be a constant.
12924 // Transfer the annotation to the other one.
12925 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12926 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12928 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
12929 GenTreePtr otherOp = op1;
12930 if (otherOp->OperGet() == GT_NEG)
12932 otherOp = otherOp->gtOp.gtOp1;
12934 assert(otherOp->OperGet() == GT_CNS_INT);
12935 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
12936 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
12941 DEBUG_DESTROY_NODE(op2);
12942 DEBUG_DESTROY_NODE(tree);
12946 /* Change the multiplication into a shift by log2(val) bits */
12947 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
12948 changeToShift = true;
12951 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
12953 int shift = genLog2(lowestBit);
12954 ssize_t factor = abs_mult >> shift;
12956 if (factor == 3 || factor == 5 || factor == 9)
12958 // if negative negate (min-int does not need negation)
12959 if (mult < 0 && mult != SSIZE_T_MIN)
12961 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12962 fgMorphTreeDone(op1);
12965 GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
12966 if (op2IsConstIndex)
12968 factorIcon->AsIntCon()->gtFieldSeq =
12969 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
12972 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
12973 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
12974 fgMorphTreeDone(op1);
12976 op2->gtIntConCommon.SetIconValue(shift);
12977 changeToShift = true;
12980 #endif // LEA_AVAILABLE
12983 // vnStore is null before the ValueNumber phase has run
12984 if (vnStore != nullptr)
12986 // Update the ValueNumber for 'op2', as we just changed the constant
12987 fgValueNumberTreeConst(op2);
12990 // Keep the old ValueNumber for 'tree' as the new expr
12991 // will still compute the same value as before
12992 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
12994 goto DONE_MORPHING_CHILDREN;
12997 else if (fgOperIsBitwiseRotationRoot(oper))
12999 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13001 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13002 oper = tree->OperGet();
13003 typ = tree->TypeGet();
13004 op1 = tree->gtOp.gtOp1;
13005 op2 = tree->gtOp.gtOp2;
13014 /* Any constant cases should have been folded earlier */
13015 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13020 noway_assert(varTypeIsFloating(op1->TypeGet()));
13022 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13026 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13027 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13028 // is a local or clsVar, even if it has been address-exposed.
13029 if (op1->OperGet() == GT_ADDR)
13031 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13037 // Can not remove a GT_IND if it is currently a CSE candidate.
13038 if (gtIsActiveCSE_Candidate(tree))
13043 bool foldAndReturnTemp;
13044 foldAndReturnTemp = false;
13048 /* Try to Fold *(&X) into X */
13049 if (op1->gtOper == GT_ADDR)
13051 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13052 if (gtIsActiveCSE_Candidate(op1))
13057 temp = op1->gtOp.gtOp1; // X
13059 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13060 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13061 // address has an associated field sequence, that identifies this case; go through
13062 // the "lcl_fld" path rather than this one.
13063 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13064 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13066 foldAndReturnTemp = true;
13068 else if (temp->OperIsLocal())
13070 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13071 LclVarDsc* varDsc = &lvaTable[lclNum];
13073 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13074 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13076 noway_assert(varTypeIsStruct(varDsc));
13078 // We will try to optimize when we have a single field struct that is being struct promoted
13079 if (varDsc->lvFieldCnt == 1)
13081 unsigned lclNumFld = varDsc->lvFieldLclStart;
13082 // just grab the promoted field
13083 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13085 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13087 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13089 // We can just use the existing promoted field LclNum
13090 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13091 temp->gtType = fieldVarDsc->TypeGet();
13093 foldAndReturnTemp = true;
13097 // If the type of the IND (typ) is a "small int", and the type of the local has the
13098 // same width, then we can reduce to just the local variable -- it will be
13099 // correctly normalized, and signed/unsigned differences won't matter.
13101 // The below transformation cannot be applied if the local var needs to be normalized on load.
13102 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13103 !lvaTable[lclNum].lvNormalizeOnLoad())
13105 tree->gtType = typ = temp->TypeGet();
13106 foldAndReturnTemp = true;
13110 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13112 assert(fieldSeq == nullptr);
13113 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13114 assert(b || fieldSeq == nullptr);
13116 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13118 // Append the field sequence, change the type.
13119 temp->AsLclFld()->gtFieldSeq =
13120 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13121 temp->gtType = typ;
13123 foldAndReturnTemp = true;
13126 // Otherwise will will fold this into a GT_LCL_FLD below
13127 // where we check (temp != nullptr)
13129 else // !temp->OperIsLocal()
13131 // We don't try to fold away the GT_IND/GT_ADDR for this case
13135 else if (op1->OperGet() == GT_ADD)
13137 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13139 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13140 (!(opts.MinOpts() || opts.compDbgCode)))
13142 // No overflow arithmetic with pointers
13143 noway_assert(!op1->gtOverflow());
13145 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13146 if (!temp->OperIsLocal())
13152 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13153 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13158 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13159 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13161 // Does the address have an associated zero-offset field sequence?
13162 FieldSeqNode* addrFieldSeq = nullptr;
13163 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13165 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13168 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13170 noway_assert(!varTypeIsGC(temp->TypeGet()));
13171 foldAndReturnTemp = true;
13175 // The emitter can't handle large offsets
13176 if (ival1 != (unsigned short)ival1)
13181 // The emitter can get confused by invalid offsets
13182 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13187 #ifdef _TARGET_ARM_
13188 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13190 if (varTypeIsFloating(typ))
13192 if ((ival1 % emitTypeSize(typ)) != 0)
13194 tree->gtFlags |= GTF_IND_UNALIGNED;
13200 // Now we can fold this into a GT_LCL_FLD below
13201 // where we check (temp != nullptr)
13205 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13206 // - We may have a load of a local where the load has a different type than the local
13207 // - We may have a load of a local plus an offset
13209 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13210 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13211 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13212 // out-of-bounds w.r.t. the local).
13213 if ((temp != nullptr) && !foldAndReturnTemp)
13215 assert(temp->OperIsLocal());
13217 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13218 LclVarDsc* const varDsc = &lvaTable[lclNum];
13220 const var_types tempTyp = temp->TypeGet();
13221 const bool useExactSize =
13222 varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13223 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13225 // Make sure we do not enregister this lclVar.
13226 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13228 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13229 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13230 // lclVar and must not extend beyond the end of the lclVar.
13231 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13233 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13234 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13235 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13237 if (temp->OperGet() == GT_LCL_FLD)
13239 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13240 temp->AsLclFld()->gtFieldSeq =
13241 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13245 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13246 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13247 if (fieldSeq != nullptr)
13248 { // If it does represent a field, note that.
13249 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13252 temp->gtType = tree->gtType;
13253 foldAndReturnTemp = true;
13257 if (foldAndReturnTemp)
13259 assert(temp != nullptr);
13260 assert(temp->TypeGet() == typ);
13261 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13263 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13264 // 'temp' because a GT_ADDR always marks it for its operand.
13265 temp->gtFlags &= ~GTF_DONT_CSE;
13266 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13268 if (op1->OperGet() == GT_ADD)
13270 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13271 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13273 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13274 DEBUG_DESTROY_NODE(tree); // GT_IND
13276 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13278 if (temp->OperIs(GT_LCL_VAR))
13281 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13282 // and the node in question must have this bit set (as it has already been morphed).
13283 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13285 const bool forceRemorph = true;
13286 temp = fgMorphLocalVar(temp, forceRemorph);
13288 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13289 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13291 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13298 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13299 // could result in an invalid value number for the newly generated GT_IND node.
13300 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13302 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13303 // TBD: this transformation is currently necessary for correctness -- it might
13304 // be good to analyze the failures that result if we don't do this, and fix them
13305 // in other ways. Ideally, this should be optional.
13306 GenTreePtr commaNode = op1;
13307 unsigned treeFlags = tree->gtFlags;
13308 commaNode->gtType = typ;
13309 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13310 // dangerous, clear the GTF_REVERSE_OPS at
13313 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13315 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13317 commaNode = commaNode->gtOp.gtOp2;
13318 commaNode->gtType = typ;
13319 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13320 // dangerous, clear the GTF_REVERSE_OPS at
13323 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13326 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13330 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13332 GetArrayInfoMap()->Remove(tree);
13335 op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13336 op1->gtFlags = treeFlags;
13339 GetArrayInfoMap()->Set(op1, arrInfo);
13342 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13344 commaNode->gtOp.gtOp2 = op1;
13352 // Can not remove op1 if it is currently a CSE candidate.
13353 if (gtIsActiveCSE_Candidate(op1))
13358 if (op1->OperGet() == GT_IND)
13360 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13362 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13363 if (gtIsActiveCSE_Candidate(tree))
13368 // Perform the transform ADDR(IND(...)) == (...).
13369 GenTreePtr addr = op1->gtOp.gtOp1;
13371 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13373 DEBUG_DESTROY_NODE(op1);
13374 DEBUG_DESTROY_NODE(tree);
13379 else if (op1->OperGet() == GT_OBJ)
13381 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13382 if (gtIsActiveCSE_Candidate(tree))
13387 // Perform the transform ADDR(OBJ(...)) == (...).
13388 GenTreePtr addr = op1->AsObj()->Addr();
13390 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13392 DEBUG_DESTROY_NODE(op1);
13393 DEBUG_DESTROY_NODE(tree);
13397 else if (op1->gtOper == GT_CAST)
13399 GenTreePtr casting = op1->gtCast.CastOp();
13400 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13402 DEBUG_DESTROY_NODE(op1);
13403 tree->gtOp.gtOp1 = op1 = casting;
13406 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13408 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13409 // (Be sure to mark "z" as an l-value...)
13410 GenTreePtr commaNode = op1;
13411 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13413 commaNode = commaNode->gtOp.gtOp2;
13415 // The top-level addr might be annotated with a zeroOffset field.
13416 FieldSeqNode* zeroFieldSeq = nullptr;
13417 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13419 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13421 // If the node we're about to put under a GT_ADDR is an indirection, it
13422 // doesn't need to be materialized, since we only want the addressing mode. Because
13423 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13424 // as a side effect.
13425 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13426 if (commaOp2->OperIsBlk())
13428 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13430 if (commaOp2->gtOper == GT_IND)
13432 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13435 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13439 // Transfer the annotation to the new GT_ADDR node.
13440 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13442 commaNode->gtOp.gtOp2 = op1;
13443 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13444 // might give op1 a type different from byref (like, say, native int). So now go back and give
13445 // all the comma nodes the type of op1.
13446 // TODO: the comma flag update below is conservative and can be improved.
13447 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13448 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13450 while (commaNode->gtOper == GT_COMMA)
13452 commaNode->gtType = op1->gtType;
13453 commaNode->gtFlags |= op1->gtFlags;
13455 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13457 commaNode = commaNode->gtOp.gtOp2;
13463 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13464 op1->gtFlags |= GTF_DONT_CSE;
13470 /* Mark the nodes that are conditionally executed */
13471 fgWalkTreePre(&tree, gtMarkColonCond);
13473 /* Since we're doing this postorder we clear this if it got set by a child */
13474 fgRemoveRestOfBlock = false;
13479 /* Special case: trees that don't produce a value */
13480 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13483 typ = tree->gtType = TYP_VOID;
13486 // If we are in the Valuenum CSE phase then don't morph away anything as these
13487 // nodes may have CSE defs/uses in them.
13489 if (!optValnumCSE_phase)
13491 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13494 GenTreePtr op1SideEffects = nullptr;
13495 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13496 // hoisted expressions in loops.
13497 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13498 if (op1SideEffects)
13500 // Replace the left hand side with the side effect list.
13501 tree->gtOp.gtOp1 = op1SideEffects;
13502 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13506 /* The left operand is worthless, throw it away */
13507 if (lvaLocalVarRefCounted)
13509 lvaRecursiveDecRefCounts(op1);
13511 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13512 DEBUG_DESTROY_NODE(tree);
13513 DEBUG_DESTROY_NODE(op1);
13517 /* If the right operand is just a void nop node, throw it away */
13518 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13520 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13521 DEBUG_DESTROY_NODE(tree);
13522 DEBUG_DESTROY_NODE(op2);
13531 /* Special case if fgRemoveRestOfBlock is set to true */
13532 if (fgRemoveRestOfBlock)
13534 if (fgIsCommaThrow(op1, true))
13536 GenTreePtr throwNode = op1->gtOp.gtOp1;
13537 noway_assert(throwNode->gtType == TYP_VOID);
13542 noway_assert(op1->OperKind() & GTK_RELOP);
13543 noway_assert(op1->gtFlags & GTF_EXCEPT);
13545 // We need to keep op1 for the side-effects. Hang it off
13548 tree->ChangeOper(GT_COMMA);
13549 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13551 // Additionally since we're eliminating the JTRUE
13552 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13553 // So we change it into a GT_COMMA as well.
13554 op1->ChangeOper(GT_COMMA);
13555 op1->gtType = op1->gtOp.gtOp1->gtType;
13564 noway_assert(oper == tree->gtOper);
13566 // If we are in the Valuenum CSE phase then don't morph away anything as these
13567 // nodes may have CSE defs/uses in them.
13569 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13571 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13572 if (op1 && fgIsCommaThrow(op1, true))
13574 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13576 /* We can safely throw out the rest of the statements */
13577 fgRemoveRestOfBlock = true;
13580 GenTreePtr throwNode = op1->gtOp.gtOp1;
13581 noway_assert(throwNode->gtType == TYP_VOID);
13583 if (oper == GT_COMMA)
13585 /* Both tree and op1 are GT_COMMA nodes */
13586 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13587 tree->gtOp.gtOp1 = throwNode;
13590 else if (oper != GT_NOP)
13592 if (genActualType(typ) == genActualType(op1->gtType))
13594 /* The types match so, return the comma throw node as the new tree */
13599 if (typ == TYP_VOID)
13601 // Return the throw node
13606 GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13608 // need type of oper to be same as tree
13609 if (typ == TYP_LONG)
13611 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13612 commaOp2->gtIntConCommon.SetLngValue(0);
13613 /* Change the types of oper and commaOp2 to TYP_LONG */
13614 op1->gtType = commaOp2->gtType = TYP_LONG;
13616 else if (varTypeIsFloating(typ))
13618 commaOp2->ChangeOperConst(GT_CNS_DBL);
13619 commaOp2->gtDblCon.gtDconVal = 0.0;
13620 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13621 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13625 commaOp2->ChangeOperConst(GT_CNS_INT);
13626 commaOp2->gtIntConCommon.SetIconValue(0);
13627 /* Change the types of oper and commaOp2 to TYP_INT */
13628 op1->gtType = commaOp2->gtType = TYP_INT;
13631 /* Return the GT_COMMA node as the new tree */
13638 /* Check for op2 as a GT_COMMA with a unconditional throw */
13640 if (op2 && fgIsCommaThrow(op2, true))
13642 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13644 /* We can safely throw out the rest of the statements */
13645 fgRemoveRestOfBlock = true;
13648 // If op1 has no side-effects
13649 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13651 // If tree is an asg node
13652 if (tree->OperIsAssignment())
13654 /* Return the throw node as the new tree */
13655 return op2->gtOp.gtOp1;
13658 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13660 /* Return the throw node as the new tree */
13661 return op2->gtOp.gtOp1;
13664 // If tree is a comma node
13665 if (tree->OperGet() == GT_COMMA)
13667 /* Return the throw node as the new tree */
13668 return op2->gtOp.gtOp1;
13671 /* for the shift nodes the type of op2 can differ from the tree type */
13672 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13674 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13676 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13678 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13679 commaOp2->gtIntConCommon.SetLngValue(0);
13681 /* Change the types of oper and commaOp2 to TYP_LONG */
13682 op2->gtType = commaOp2->gtType = TYP_LONG;
13685 if ((genActualType(typ) == TYP_INT) &&
13686 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13688 // An example case is comparison (say GT_GT) of two longs or floating point values.
13690 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13692 commaOp2->ChangeOperConst(GT_CNS_INT);
13693 commaOp2->gtIntCon.gtIconVal = 0;
13694 /* Change the types of oper and commaOp2 to TYP_INT */
13695 op2->gtType = commaOp2->gtType = TYP_INT;
13698 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13700 noway_assert(tree->OperGet() == GT_ADD);
13702 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13704 commaOp2->ChangeOperConst(GT_CNS_INT);
13705 commaOp2->gtIntCon.gtIconVal = 0;
13706 /* Change the types of oper and commaOp2 to TYP_BYREF */
13707 op2->gtType = commaOp2->gtType = TYP_BYREF;
13710 /* types should now match */
13711 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13713 /* Return the GT_COMMA node as the new tree */
13719 /*-------------------------------------------------------------------------
13720 * Optional morphing is done if tree transformations is permitted
13723 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13728 tree = fgMorphSmpOpOptional(tree->AsOp());
13730 } // extra scope for gcc workaround
13734 #pragma warning(pop)
13737 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13739 genTreeOps oper = tree->gtOper;
13740 GenTree* op1 = tree->gtOp1;
13741 GenTree* op2 = tree->gtOp2;
13742 var_types typ = tree->TypeGet();
13744 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
13746 /* Swap the operands so that the more expensive one is 'op1' */
13748 if (tree->gtFlags & GTF_REVERSE_OPS)
13756 tree->gtFlags &= ~GTF_REVERSE_OPS;
13759 if (oper == op2->gtOper)
13761 /* Reorder nested operators at the same precedence level to be
13762 left-recursive. For example, change "(a+(b+c))" to the
13763 equivalent expression "((a+b)+c)".
13766 /* Things are handled differently for floating-point operators */
13768 if (!varTypeIsFloating(tree->TypeGet()))
13770 fgMoveOpsLeft(tree);
13779 /* Change "((x+icon)+y)" to "((x+y)+icon)"
13780 Don't reorder floating-point operations */
13782 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13783 varTypeIsIntegralOrI(typ))
13785 GenTreePtr ad2 = op1->gtOp.gtOp2;
13787 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13799 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
13800 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13801 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
13804 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13807 if (varTypeIsGC(op2->TypeGet()))
13809 noway_assert(varTypeIsGC(typ));
13814 op1->gtOp.gtOp2 = op2;
13815 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13823 /*-------------------------------------------------------------------------
13824 * Perform optional oper-specific postorder morphing
13830 bool dstIsSafeLclVar;
13833 /* We'll convert "a = a <op> x" into "a <op>= x" */
13834 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
13835 CLANG_FORMAT_COMMENT_ANCHOR;
13837 if (typ == TYP_LONG)
13842 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13844 if (tree->OperIsCopyBlkOp())
13846 return fgMorphCopyBlock(tree);
13850 return fgMorphInitBlock(tree);
13854 /* Make sure we're allowed to do this */
13856 if (optValnumCSE_phase)
13858 // It is not safe to reorder/delete CSE's
13862 /* Are we assigning to a GT_LCL_VAR ? */
13864 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13866 /* If we have a GT_LCL_VAR, then is the address taken? */
13867 if (dstIsSafeLclVar)
13869 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
13870 LclVarDsc* varDsc = lvaTable + lclNum;
13872 noway_assert(lclNum < lvaCount);
13874 /* Is the address taken? */
13875 if (varDsc->lvAddrExposed)
13877 dstIsSafeLclVar = false;
13879 else if (op2->gtFlags & GTF_ASG)
13885 if (!dstIsSafeLclVar)
13887 if (op2->gtFlags & GTF_ASG)
13892 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13898 /* Special case: a cast that can be thrown away */
13900 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13906 srct = op2->gtCast.CastOp()->TypeGet();
13907 cast = (var_types)op2->CastToType();
13908 dstt = op1->TypeGet();
13910 /* Make sure these are all ints and precision is not lost */
13912 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13914 op2 = tree->gtOp2 = op2->gtCast.CastOp();
13918 /* Make sure we have the operator range right */
13920 static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
13921 static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
13922 static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
13923 static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
13924 static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
13925 static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
13927 static_assert(GT_OR == GT_ADD + 7, "bad oper value");
13928 static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
13929 static_assert(GT_AND == GT_ADD + 9, "bad oper value");
13931 static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
13932 static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
13933 static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
13935 /* Check for a suitable operator on the RHS */
13937 cmop = op2->OperGet();
13942 // GT_CHS only supported for integer types
13943 if (varTypeIsFloating(tree->TypeGet()))
13951 // GT_ASG_MUL only supported for floating point types
13952 if (!varTypeIsFloating(tree->TypeGet()))
13961 if (op2->gtOverflow())
13963 /* Disable folding into "<op>=" if the result can be
13964 visible to anyone as <op> may throw an exception and
13965 the assignment should not proceed
13966 We are safe with an assignment to a local variables
13968 if (ehBlockHasExnFlowDsc(compCurBB))
13972 if (!dstIsSafeLclVar)
13977 #ifndef _TARGET_AMD64_
13978 // This is hard for byte-operations as we need to make
13979 // sure both operands are in RBM_BYTE_REGS.
13980 if (varTypeIsByte(op2->TypeGet()))
13982 #endif // _TARGET_AMD64_
13987 // GT_ASG_DIV only supported for floating point types
13988 if (!varTypeIsFloating(tree->TypeGet()))
14001 bool bReverse = false;
14002 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14003 if (bAsgOpFoldable)
14007 // We will transform this from "a = x <op> a" to "a <op>= x"
14008 // so we can now destroy the duplicate "a"
14009 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14010 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14013 /* Special case: "x |= -1" and "x &= 0" */
14014 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14015 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14017 /* Simply change to an assignment */
14018 tree->gtOp2 = op2->gtOp.gtOp2;
14022 if (cmop == GT_NEG)
14024 /* This is "x = -x;", use the flipsign operator */
14026 tree->ChangeOper(GT_CHS);
14028 if (op1->gtOper == GT_LCL_VAR)
14030 op1->gtFlags |= GTF_VAR_USEASG;
14033 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14038 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14040 // Changing from x = x op y to x op= y when x is a small integer type
14041 // makes the op size smaller (originally the op size was 32 bits, after
14042 // sign or zero extension of x, and there is an implicit truncation in the
14044 // This is ok in most cases because the upper bits were
14045 // lost when assigning the op result to a small type var,
14046 // but it may not be ok for the right shift operation where the higher bits
14047 // could be shifted into the lower bits and preserved.
14048 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14049 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14050 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14053 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14054 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14055 // The result becomes correct if we use >>unsigned instead of >>signed.
14056 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14060 /* Replace with an assignment operator */
14061 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14062 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14063 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14064 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14065 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14066 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14067 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14068 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14070 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14071 tree->gtOp2 = op2->gtOp.gtOp2;
14073 /* Propagate GTF_OVERFLOW */
14075 if (op2->gtOverflowEx())
14077 tree->gtType = op2->gtType;
14078 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14081 #if FEATURE_SET_FLAGS
14083 /* Propagate GTF_SET_FLAGS */
14084 if (op2->gtSetFlags())
14086 tree->gtRequestSetFlags();
14089 #endif // FEATURE_SET_FLAGS
14091 DEBUG_DESTROY_NODE(op2);
14094 /* The target is used as well as being defined */
14095 if (op1->OperIsLocal())
14097 op1->gtFlags &= ~GTF_VAR_USEDEF;
14098 op1->gtFlags |= GTF_VAR_USEASG;
14101 #if CPU_HAS_FP_SUPPORT
14102 /* Check for the special case "x += y * x;" */
14104 // GT_ASG_MUL only supported for floating point types
14105 if (cmop != GT_ADD && cmop != GT_SUB)
14110 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14112 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14114 /* Change "x += x * y" into "x *= (y + 1)" */
14116 op2 = op2->gtOp.gtOp2;
14118 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14120 /* Change "x += y * x" into "x *= (y + 1)" */
14122 op2 = op2->gtOp.gtOp1;
14129 op1 = gtNewDconNode(1.0);
14131 /* Now make the "*=" node */
14133 if (cmop == GT_ADD)
14135 /* Change "x += x * y" into "x *= (y + 1)" */
14137 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14141 /* Change "x -= x * y" into "x *= (1 - y)" */
14143 noway_assert(cmop == GT_SUB);
14144 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14146 tree->ChangeOper(GT_ASG_MUL);
14148 #endif // CPU_HAS_FP_SUPPORT
14156 /* Is the destination identical to the first RHS sub-operand? */
14158 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14160 /* This is "x = ~x" which is the same as "x ^= -1"
14161 * Transform the node into a GT_ASG_XOR */
14163 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14165 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14180 /* Check for the case "(val + icon) * icon" */
14182 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14184 GenTreePtr add = op1->gtOp.gtOp2;
14186 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14188 if (tree->gtOverflow() || op1->gtOverflow())
14193 ssize_t imul = op2->gtIntCon.gtIconVal;
14194 ssize_t iadd = add->gtIntCon.gtIconVal;
14196 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14199 tree->ChangeOper(oper);
14201 op2->gtIntCon.gtIconVal = iadd * imul;
14203 op1->ChangeOper(GT_MUL);
14205 add->gtIntCon.gtIconVal = imul;
14206 #ifdef _TARGET_64BIT_
14207 if (add->gtType == TYP_INT)
14209 // we need to properly re-sign-extend or truncate after multiplying two int constants above
14210 add->AsIntCon()->TruncateOrSignExtend32();
14212 #endif //_TARGET_64BIT_
14220 /* For "val / 1", just return "val" */
14222 if (op2->IsIntegralConst(1))
14224 DEBUG_DESTROY_NODE(tree);
14232 /* Check for the case "(val + icon) << icon" */
14234 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14236 GenTreePtr cns = op1->gtOp.gtOp2;
14238 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14240 ssize_t ishf = op2->gtIntConCommon.IconValue();
14241 ssize_t iadd = cns->gtIntConCommon.IconValue();
14243 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14245 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14247 tree->ChangeOper(GT_ADD);
14248 ssize_t result = iadd << ishf;
14249 op2->gtIntConCommon.SetIconValue(result);
14250 #ifdef _TARGET_64BIT_
14251 if (op1->gtType == TYP_INT)
14253 op2->AsIntCon()->TruncateOrSignExtend32();
14255 #endif // _TARGET_64BIT_
14257 // we are reusing the shift amount node here, but the type we want is that of the shift result
14258 op2->gtType = op1->gtType;
14260 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14261 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14263 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14264 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14267 op1->ChangeOper(GT_LSH);
14269 cns->gtIntConCommon.SetIconValue(ishf);
14277 if (!optValnumCSE_phase)
14279 /* "x ^ -1" is "~x" */
14281 if (op2->IsIntegralConst(-1))
14283 tree->ChangeOper(GT_NOT);
14284 tree->gtOp2 = nullptr;
14285 DEBUG_DESTROY_NODE(op2);
14287 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14289 /* "binaryVal ^ 1" is "!binaryVal" */
14290 gtReverseCond(op1);
14291 DEBUG_DESTROY_NODE(op2);
14292 DEBUG_DESTROY_NODE(tree);
14300 // Initialization values for initBlk have special semantics - their lower
14301 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14302 // which enables them to get a VNForZero, and be propagated.
14303 if (op1->IsIntegralConst(0))
14315 //------------------------------------------------------------------------
14316 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14317 // (see ECMA III 3.55 and III.3.56).
14320 // tree - The GT_MOD/GT_UMOD tree to morph
14323 // The morphed tree
14326 // For ARM64 we don't have a remainder instruction so this transform is
14327 // always done. For XARCH this transform is done if we know that magic
14328 // division will be used, in that case this transform allows CSE to
14329 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14331 // This method will produce the above expression in 'a' and 'b' are
14332 // leaf nodes, otherwise, if any of them is not a leaf it will spill
14333 // its value into a temporary variable, an example:
14334 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14336 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14338 if (tree->OperGet() == GT_MOD)
14340 tree->SetOper(GT_DIV);
14342 else if (tree->OperGet() == GT_UMOD)
14344 tree->SetOper(GT_UDIV);
14348 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14351 var_types type = tree->gtType;
14352 GenTree* denominator = tree->gtOp2;
14353 GenTree* numerator = tree->gtOp1;
14355 if (!numerator->OperIsLeaf())
14357 numerator = fgMakeMultiUse(&tree->gtOp1);
14359 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14361 // Morphing introduces new lclVar references. Increase ref counts
14362 lvaIncRefCnts(numerator);
14365 if (!denominator->OperIsLeaf())
14367 denominator = fgMakeMultiUse(&tree->gtOp2);
14369 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14371 // Morphing introduces new lclVar references. Increase ref counts
14372 lvaIncRefCnts(denominator);
14375 // The numerator and denominator may have been assigned to temps, in which case
14376 // their defining assignments are in the current tree. Therefore, we need to
14377 // set the execuction order accordingly on the nodes we create.
14378 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14379 // be set to be evaluated in reverse order.
14381 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14382 assert(!mul->IsReverseOp());
14383 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14384 sub->gtFlags |= GTF_REVERSE_OPS;
14387 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14393 //------------------------------------------------------------------------------
14394 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14398 // oper - Operation to check
14401 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
14403 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14405 return (oper == GT_OR) || (oper == GT_XOR);
14408 //------------------------------------------------------------------------------
14409 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14410 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14413 // tree - tree to check for a rotation pattern
14416 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14419 // The input is a GT_OR or a GT_XOR tree.
14421 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14423 #ifndef LEGACY_BACKEND
14425 // Check for a rotation pattern, e.g.,
14438 // The patterns recognized:
14439 // (x << (y & M)) op (x >>> ((-y + N) & M))
14440 // (x >>> ((-y + N) & M)) op (x << (y & M))
14442 // (x << y) op (x >>> (-y + N))
14443 // (x >> > (-y + N)) op (x << y)
14445 // (x >>> (y & M)) op (x << ((-y + N) & M))
14446 // (x << ((-y + N) & M)) op (x >>> (y & M))
14448 // (x >>> y) op (x << (-y + N))
14449 // (x << (-y + N)) op (x >>> y)
14451 // (x << c1) op (x >>> c2)
14452 // (x >>> c1) op (x << c2)
14455 // c1 and c2 are const
14456 // c1 + c2 == bitsize(x)
14459 // M & (N - 1) == N - 1
14460 // op is either | or ^
14462 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14464 // We can't do anything if the tree has assignments, calls, or volatile
14465 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14466 // thrown by the original tree will be thrown by the transformed tree as well.
14470 genTreeOps oper = tree->OperGet();
14471 assert(fgOperIsBitwiseRotationRoot(oper));
14473 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14474 GenTreePtr op1 = tree->gtGetOp1();
14475 GenTreePtr op2 = tree->gtGetOp2();
14476 GenTreePtr leftShiftTree = nullptr;
14477 GenTreePtr rightShiftTree = nullptr;
14478 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14480 leftShiftTree = op1;
14481 rightShiftTree = op2;
14483 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14485 leftShiftTree = op2;
14486 rightShiftTree = op1;
14493 // Check if the trees representing the value to shift are identical.
14494 // We already checked that there are no side effects above.
14495 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14497 GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
14498 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14499 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14500 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14501 GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
14502 GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14504 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14505 // shouldn't be masked for the transformation to be valid. If additional
14506 // higher bits are not masked, the transformation is still valid since the result
14507 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14508 // than the width of the value being shifted.
14509 ssize_t minimalMask = rotatedValueBitSize - 1;
14510 ssize_t leftShiftMask = -1;
14511 ssize_t rightShiftMask = -1;
14513 if ((leftShiftIndex->OperGet() == GT_AND))
14515 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14517 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14518 leftShiftIndex = leftShiftIndex->gtGetOp1();
14526 if ((rightShiftIndex->OperGet() == GT_AND))
14528 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14530 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14531 rightShiftIndex = rightShiftIndex->gtGetOp1();
14539 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14541 // The shift index is overmasked, e.g., we have
14542 // something like (x << y & 15) or
14543 // (x >> (32 - y) & 15 with 32 bit x.
14544 // The transformation is not valid.
14548 GenTreePtr shiftIndexWithAdd = nullptr;
14549 GenTreePtr shiftIndexWithoutAdd = nullptr;
14550 genTreeOps rotateOp = GT_NONE;
14551 GenTreePtr rotateIndex = nullptr;
14553 if (leftShiftIndex->OperGet() == GT_ADD)
14555 shiftIndexWithAdd = leftShiftIndex;
14556 shiftIndexWithoutAdd = rightShiftIndex;
14559 else if (rightShiftIndex->OperGet() == GT_ADD)
14561 shiftIndexWithAdd = rightShiftIndex;
14562 shiftIndexWithoutAdd = leftShiftIndex;
14566 if (shiftIndexWithAdd != nullptr)
14568 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14570 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14572 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14574 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14576 // We found one of these patterns:
14577 // (x << (y & M)) | (x >>> ((-y + N) & M))
14578 // (x << y) | (x >>> (-y + N))
14579 // (x >>> (y & M)) | (x << ((-y + N) & M))
14580 // (x >>> y) | (x << (-y + N))
14581 // where N == bitsize(x), M is const, and
14582 // M & (N - 1) == N - 1
14583 CLANG_FORMAT_COMMENT_ANCHOR;
14585 #ifndef _TARGET_64BIT_
14586 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14588 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14589 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14590 // to add helpers for GT_ROL and GT_ROR.
14595 rotateIndex = shiftIndexWithoutAdd;
14601 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14603 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14605 // We found this pattern:
14606 // (x << c1) | (x >>> c2)
14607 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14609 rotateIndex = leftShiftIndex;
14613 if (rotateIndex != nullptr)
14615 noway_assert(GenTree::OperIsRotate(rotateOp));
14617 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14619 // We can use the same tree only during global morph; reusing the tree in a later morph
14620 // may invalidate value numbers.
14623 tree->gtOp.gtOp1 = rotatedValue;
14624 tree->gtOp.gtOp2 = rotateIndex;
14625 tree->ChangeOper(rotateOp);
14627 unsigned childFlags = 0;
14628 for (GenTree* op : tree->Operands())
14630 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14633 // The parent's flags should be a superset of its operands' flags
14634 noway_assert((inputTreeEffects & childFlags) == childFlags);
14638 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14639 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14645 #endif // LEGACY_BACKEND
14649 #if !CPU_HAS_FP_SUPPORT
14650 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14653 genTreeOps oper = tree->OperGet();
14654 var_types typ = tree->TypeGet();
14655 GenTreePtr op1 = tree->gtOp.gtOp1;
14656 GenTreePtr op2 = tree->gtGetOp2IfPresent();
14659 We have to use helper calls for all FP operations:
14661 FP operators that operate on FP values
14662 casts to and from FP
14663 comparisons of FP values
14666 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14670 size_t argc = genTypeStSz(typ);
14672 /* Not all FP operations need helper calls */
14686 /* If the result isn't FP, it better be a compare or cast */
14688 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14691 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14694 /* Keep track of how many arguments we're passing */
14696 fgPtrArgCntCur += argc;
14698 /* Is this a binary operator? */
14702 /* Add the second operand to the argument count */
14704 fgPtrArgCntCur += argc;
14707 /* What kind of an operator do we have? */
14712 helper = CPX_R4_ADD;
14715 helper = CPX_R4_SUB;
14718 helper = CPX_R4_MUL;
14721 helper = CPX_R4_DIV;
14723 // case GT_MOD: helper = CPX_R4_REM; break;
14726 helper = CPX_R4_EQ;
14729 helper = CPX_R4_NE;
14732 helper = CPX_R4_LT;
14735 helper = CPX_R4_LE;
14738 helper = CPX_R4_GE;
14741 helper = CPX_R4_GT;
14748 noway_assert(!"unexpected FP binary op");
14752 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14762 noway_assert(!"FP cast");
14765 helper = CPX_R4_NEG;
14772 noway_assert(!"unexpected FP unary op");
14776 args = gtNewArgList(tree->gtOp.gtOp1);
14779 /* If we have double result/operands, modify the helper */
14781 if (typ == TYP_DOUBLE)
14783 noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
14784 noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
14785 noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
14786 noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
14787 noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
14793 noway_assert(tree->OperIsCompare());
14795 noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
14796 noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
14797 noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
14798 noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
14799 noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
14800 noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
14803 tree = fgMorphIntoHelperCall(tree, helper, args);
14805 if (fgPtrArgCntMax < fgPtrArgCntCur)
14807 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14808 fgPtrArgCntMax = fgPtrArgCntCur;
14811 fgPtrArgCntCur -= argc;
14819 if (compCurBB == genReturnBB)
14821 /* This is the 'exitCrit' call at the exit label */
14823 noway_assert(op1->gtType == TYP_VOID);
14824 noway_assert(op2 == 0);
14826 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14831 /* This is a (real) return value -- check its type */
14832 CLANG_FORMAT_COMMENT_ANCHOR;
14835 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14837 bool allowMismatch = false;
14839 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14840 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14841 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14842 allowMismatch = true;
14844 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14845 allowMismatch = true;
14847 if (!allowMismatch)
14848 NO_WAY("Return type mismatch");
14858 /*****************************************************************************
14860 * Transform the given tree for code generation and return an equivalent tree.
14863 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14865 noway_assert(tree);
14866 noway_assert(tree->gtOper != GT_STMT);
14871 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14873 noway_assert(!"JitBreakMorphTree hit");
14879 int thisMorphNum = 0;
14880 if (verbose && treesBeforeAfterMorph)
14882 thisMorphNum = morphNum++;
14883 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14890 // Apply any rewrites for implicit byref arguments before morphing the
14893 if (fgMorphImplicitByRefArgs(tree))
14896 if (verbose && treesBeforeAfterMorph)
14898 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14905 /*-------------------------------------------------------------------------
14906 * fgMorphTree() can potentially replace a tree with another, and the
14907 * caller has to store the return value correctly.
14908 * Turn this on to always make copy of "tree" here to shake out
14909 * hidden/unupdated references.
14914 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14918 #ifdef SMALL_TREE_NODES
14919 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14921 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14926 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14929 copy->CopyFrom(tree, this);
14931 #if defined(LATE_DISASM)
14932 // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
14933 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14935 copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
14936 copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
14940 DEBUG_DESTROY_NODE(tree);
14947 /* Ensure that we haven't morphed this node already */
14948 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14950 #if LOCAL_ASSERTION_PROP
14951 /* Before morphing the tree, we try to propagate any active assertions */
14952 if (optLocalAssertionProp)
14954 /* Do we have any active assertions? */
14956 if (optAssertionCount > 0)
14958 GenTreePtr newTree = tree;
14959 while (newTree != nullptr)
14962 /* newTree is non-Null if we propagated an assertion */
14963 newTree = optAssertionProp(apFull, tree, nullptr);
14965 noway_assert(tree != nullptr);
14968 PREFAST_ASSUME(tree != nullptr);
14972 /* Save the original un-morphed tree for fgMorphTreeDone */
14974 GenTreePtr oldTree = tree;
14976 /* Figure out what kind of a node we have */
14978 unsigned kind = tree->OperKind();
14980 /* Is this a constant node? */
14982 if (kind & GTK_CONST)
14984 tree = fgMorphConst(tree);
14988 /* Is this a leaf node? */
14990 if (kind & GTK_LEAF)
14992 tree = fgMorphLeaf(tree);
14996 /* Is it a 'simple' unary/binary operator? */
14998 if (kind & GTK_SMPOP)
15000 tree = fgMorphSmpOp(tree, mac);
15004 /* See what kind of a special operator we have here */
15006 switch (tree->OperGet())
15009 tree = fgMorphField(tree, mac);
15013 tree = fgMorphCall(tree->AsCall());
15016 case GT_ARR_BOUNDS_CHECK:
15017 #ifdef FEATURE_SIMD
15019 #endif // FEATURE_SIMD
15021 fgSetRngChkTarget(tree);
15023 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15024 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
15025 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
15026 // If the index is a comma(throw, x), just return that.
15027 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15029 tree = bndsChk->gtIndex;
15032 // Propagate effects flags upwards
15033 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15034 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15036 // Otherwise, we don't change the tree.
15041 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15042 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15045 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15047 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15048 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15052 fgSetRngChkTarget(tree, false);
15056 case GT_ARR_OFFSET:
15057 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15058 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15059 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15060 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15061 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15062 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15065 fgSetRngChkTarget(tree, false);
15070 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15071 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15072 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15075 case GT_STORE_DYN_BLK:
15076 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15079 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
15080 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15087 noway_assert(!"unexpected operator");
15091 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15096 #if LOCAL_ASSERTION_PROP
15097 //------------------------------------------------------------------------
15098 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15101 // lclNum - The varNum of the lclVar for which we're killing assertions.
15102 // tree - (DEBUG only) the tree responsible for killing its assertions.
15104 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
15106 /* All dependent assertions are killed here */
15108 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15112 AssertionIndex index = optAssertionCount;
15113 while (killed && (index > 0))
15115 if (BitVecOps::IsMember(apTraits, killed, index - 1))
15118 AssertionDsc* curAssertion = optGetAssertion(index);
15119 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15120 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15123 printf("\nThe assignment ");
15125 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15126 optPrintAssertion(curAssertion);
15129 // Remove this bit from the killed mask
15130 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15132 optAssertionRemove(index);
15138 // killed mask should now be zero
15139 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15142 //------------------------------------------------------------------------
15143 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15146 // lclNum - The varNum of the lclVar for which we're killing assertions.
15147 // tree - (DEBUG only) the tree responsible for killing its assertions.
15150 // For structs and struct fields, it will invalidate the children and parent
15152 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15154 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
15156 LclVarDsc* varDsc = &lvaTable[lclNum];
15158 if (varDsc->lvPromoted)
15160 noway_assert(varTypeIsStruct(varDsc));
15162 // Kill the field locals.
15163 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15165 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15168 // Kill the struct local itself.
15169 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15171 else if (varDsc->lvIsStructField)
15173 // Kill the field local.
15174 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15176 // Kill the parent struct.
15177 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15181 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15184 #endif // LOCAL_ASSERTION_PROP
15186 /*****************************************************************************
15188 * This function is called to complete the morphing of a tree node
15189 * It should only be called once for each node.
15190 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15191 * to enforce the invariant that each node is only morphed once.
15192 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15193 * by an equivalent tree.
15197 void Compiler::fgMorphTreeDone(GenTreePtr tree,
15198 GenTreePtr oldTree /* == NULL */
15199 DEBUGARG(int morphNum))
15202 if (verbose && treesBeforeAfterMorph)
15204 printf("\nfgMorphTree (after %d):\n", morphNum);
15206 printf(""); // in our logic this causes a flush
15210 if (!fgGlobalMorph)
15215 if ((oldTree != nullptr) && (oldTree != tree))
15217 /* Ensure that we have morphed this node */
15218 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15221 TransferTestDataToNode(oldTree, tree);
15226 // Ensure that we haven't morphed this node already
15227 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15230 if (tree->OperKind() & GTK_CONST)
15235 #if LOCAL_ASSERTION_PROP
15237 if (!optLocalAssertionProp)
15242 /* Do we have any active assertions? */
15244 if (optAssertionCount > 0)
15246 /* Is this an assignment to a local variable */
15247 GenTreeLclVarCommon* lclVarTree = nullptr;
15248 if (tree->DefinesLocal(this, &lclVarTree))
15250 unsigned lclNum = lclVarTree->gtLclNum;
15251 noway_assert(lclNum < lvaCount);
15252 fgKillDependentAssertions(lclNum DEBUGARG(tree));
15256 /* If this tree makes a new assertion - make it available */
15257 optAssertionGen(tree);
15259 #endif // LOCAL_ASSERTION_PROP
15264 /* Mark this node as being morphed */
15265 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15269 /*****************************************************************************
15271 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15272 * Returns true if we modified the flow graph
15275 bool Compiler::fgFoldConditional(BasicBlock* block)
15277 bool result = false;
15279 // We don't want to make any code unreachable
15280 if (opts.compDbgCode || opts.MinOpts())
15285 if (block->bbJumpKind == BBJ_COND)
15287 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15289 GenTreePtr stmt = block->bbTreeList->gtPrev;
15291 noway_assert(stmt->gtNext == nullptr);
15293 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15295 noway_assert(fgRemoveRestOfBlock);
15297 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15298 fgConvertBBToThrowBB(block);
15300 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15301 fgRemoveRefPred(block->bbNext, block);
15303 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15304 fgRemoveRefPred(block->bbJumpDest, block);
15309 printf("\nConditional folded at BB%02u\n", block->bbNum);
15310 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15316 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15318 /* Did we fold the conditional */
15320 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15322 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15324 if (cond->OperKind() & GTK_CONST)
15326 /* Yupee - we folded the conditional!
15327 * Remove the conditional statement */
15329 noway_assert(cond->gtOper == GT_CNS_INT);
15330 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15332 /* remove the statement from bbTreelist - No need to update
15333 * the reference counts since there are no lcl vars */
15334 fgRemoveStmt(block, stmt);
15336 // block is a BBJ_COND that we are folding the conditional for
15337 // bTaken is the path that will always be taken from block
15338 // bNotTaken is the path that will never be taken from block
15340 BasicBlock* bTaken;
15341 BasicBlock* bNotTaken;
15343 if (cond->gtIntCon.gtIconVal != 0)
15345 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15346 block->bbJumpKind = BBJ_ALWAYS;
15347 bTaken = block->bbJumpDest;
15348 bNotTaken = block->bbNext;
15352 /* Unmark the loop if we are removing a backwards branch */
15353 /* dest block must also be marked as a loop head and */
15354 /* We must be able to reach the backedge block */
15355 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15356 fgReachable(block->bbJumpDest, block))
15358 optUnmarkLoopBlocks(block->bbJumpDest, block);
15361 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15362 block->bbJumpKind = BBJ_NONE;
15363 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15364 bTaken = block->bbNext;
15365 bNotTaken = block->bbJumpDest;
15368 if (fgHaveValidEdgeWeights)
15370 // We are removing an edge from block to bNotTaken
15371 // and we have already computed the edge weights, so
15372 // we will try to adjust some of the weights
15374 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15375 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15377 // We examine the taken edge (block -> bTaken)
15378 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15379 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15380 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15382 if (block->hasProfileWeight())
15384 // The edge weights for (block -> bTaken) are 100% of block's weight
15385 edgeTaken->flEdgeWeightMin = block->bbWeight;
15386 edgeTaken->flEdgeWeightMax = block->bbWeight;
15388 if (!bTaken->hasProfileWeight())
15390 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15392 // Update the weight of bTaken
15393 bTaken->inheritWeight(block);
15398 else if (bTaken->hasProfileWeight())
15400 if (bTaken->countOfInEdges() == 1)
15402 // There is only one in edge to bTaken
15403 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15404 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15406 // Update the weight of block
15407 block->inheritWeight(bTaken);
15412 if (bUpdated != nullptr)
15415 // Now fix the weights of the edges out of 'bUpdated'
15416 switch (bUpdated->bbJumpKind)
15419 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15420 edge->flEdgeWeightMax = bUpdated->bbWeight;
15423 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15424 edge->flEdgeWeightMax = bUpdated->bbWeight;
15427 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15428 edge->flEdgeWeightMax = bUpdated->bbWeight;
15431 // We don't handle BBJ_SWITCH
15437 /* modify the flow graph */
15439 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15440 fgRemoveRefPred(bNotTaken, block);
15445 printf("\nConditional folded at BB%02u\n", block->bbNum);
15446 printf("BB%02u becomes a %s", block->bbNum,
15447 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15448 if (block->bbJumpKind == BBJ_ALWAYS)
15450 printf(" to BB%02u", block->bbJumpDest->bbNum);
15456 /* if the block was a loop condition we may have to modify
15457 * the loop table */
15459 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15461 /* Some loops may have been already removed by
15462 * loop unrolling or conditional folding */
15464 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15469 /* We are only interested in the loop bottom */
15471 if (optLoopTable[loopNum].lpBottom == block)
15473 if (cond->gtIntCon.gtIconVal == 0)
15475 /* This was a bogus loop (condition always false)
15476 * Remove the loop from the table */
15478 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15482 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15483 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15493 else if (block->bbJumpKind == BBJ_SWITCH)
15495 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15497 GenTreePtr stmt = block->bbTreeList->gtPrev;
15499 noway_assert(stmt->gtNext == nullptr);
15501 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15503 noway_assert(fgRemoveRestOfBlock);
15505 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15506 fgConvertBBToThrowBB(block);
15508 /* update the flow graph */
15510 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15511 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15513 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15515 BasicBlock* curJump = *jumpTab;
15517 /* Remove 'block' from the predecessor list of 'curJump' */
15518 fgRemoveRefPred(curJump, block);
15524 printf("\nConditional folded at BB%02u\n", block->bbNum);
15525 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15531 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15533 /* Did we fold the conditional */
15535 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15537 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15539 if (cond->OperKind() & GTK_CONST)
15541 /* Yupee - we folded the conditional!
15542 * Remove the conditional statement */
15544 noway_assert(cond->gtOper == GT_CNS_INT);
15546 /* remove the statement from bbTreelist - No need to update
15547 * the reference counts since there are no lcl vars */
15548 fgRemoveStmt(block, stmt);
15550 /* modify the flow graph */
15552 /* Find the actual jump target */
15553 unsigned switchVal;
15554 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15556 jumpCnt = block->bbJumpSwt->bbsCount;
15557 BasicBlock** jumpTab;
15558 jumpTab = block->bbJumpSwt->bbsDstTab;
15562 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15564 BasicBlock* curJump = *jumpTab;
15566 assert(curJump->countOfInEdges() > 0);
15568 // If val matches switchVal or we are at the last entry and
15569 // we never found the switch value then set the new jump dest
15571 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15573 if (curJump != block->bbNext)
15575 /* transform the basic block into a BBJ_ALWAYS */
15576 block->bbJumpKind = BBJ_ALWAYS;
15577 block->bbJumpDest = curJump;
15579 // if we are jumping backwards, make sure we have a GC Poll.
15580 if (curJump->bbNum > block->bbNum)
15582 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15587 /* transform the basic block into a BBJ_NONE */
15588 block->bbJumpKind = BBJ_NONE;
15589 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15595 /* Remove 'block' from the predecessor list of 'curJump' */
15596 fgRemoveRefPred(curJump, block);
15602 printf("\nConditional folded at BB%02u\n", block->bbNum);
15603 printf("BB%02u becomes a %s", block->bbNum,
15604 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15605 if (block->bbJumpKind == BBJ_ALWAYS)
15607 printf(" to BB%02u", block->bbJumpDest->bbNum);
15619 //*****************************************************************************
15621 // Morphs a single statement in a block.
15622 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15624 // Returns true if 'stmt' was removed from the block.
15625 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15628 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15630 assert(block != nullptr);
15631 assert(stmt != nullptr);
15634 compCurStmt = stmt;
15636 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15638 // Bug 1106830 - During the CSE phase we can't just remove
15639 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15640 // This leads to a noway_assert in OptCSE.cpp when
15641 // searching for the removed CSE ref. (using gtFindLink)
15643 if (!optValnumCSE_phase)
15645 // Check for morph as a GT_COMMA with an unconditional throw
15646 if (fgIsCommaThrow(morph, true))
15651 printf("Folding a top-level fgIsCommaThrow stmt\n");
15652 printf("Removing op2 as unreachable:\n");
15653 gtDispTree(morph->gtOp.gtOp2);
15657 // Use the call as the new stmt
15658 morph = morph->gtOp.gtOp1;
15659 noway_assert(morph->gtOper == GT_CALL);
15662 // we can get a throw as a statement root
15663 if (fgIsThrow(morph))
15668 printf("We have a top-level fgIsThrow stmt\n");
15669 printf("Removing the rest of block as unreachable:\n");
15672 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15673 fgRemoveRestOfBlock = true;
15677 stmt->gtStmtExpr = morph;
15679 if (lvaLocalVarRefCounted)
15681 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15682 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15685 // Can the entire tree be removed?
15686 bool removedStmt = fgCheckRemoveStmt(block, stmt);
15688 // Or this is the last statement of a conditional branch that was just folded?
15689 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15691 if (fgFoldConditional(block))
15693 if (block->bbJumpKind != BBJ_THROW)
15695 removedStmt = true;
15702 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15703 gtSetStmtInfo(stmt);
15705 // Have to re-link the nodes for this statement
15706 fgSetStmtSeq(stmt);
15712 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15718 if (fgRemoveRestOfBlock)
15720 // Remove the rest of the stmts in the block
15721 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15723 fgRemoveStmt(block, stmt);
15726 // The rest of block has been removed and we will always throw an exception.
15728 // Update succesors of block
15729 fgRemoveBlockAsPred(block);
15731 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15732 // We should not convert it to a ThrowBB.
15733 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15735 // Convert block to a throw bb
15736 fgConvertBBToThrowBB(block);
15742 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15745 fgRemoveRestOfBlock = false;
15748 return removedStmt;
15751 /*****************************************************************************
15753 * Morph the statements of the given block.
15754 * This function should be called just once for a block. Use fgMorphBlockStmt()
15755 * for reentrant calls.
15758 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15760 fgRemoveRestOfBlock = false;
15762 noway_assert(fgExpandInline == false);
15764 /* Make the current basic block address available globally */
15768 *mult = *lnot = *loadw = false;
15770 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15772 GenTreeStmt* stmt = block->firstStmt();
15773 GenTreePtr prev = nullptr;
15774 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15776 noway_assert(stmt->gtOper == GT_STMT);
15778 if (fgRemoveRestOfBlock)
15780 fgRemoveStmt(block, stmt);
15783 #ifdef FEATURE_SIMD
15784 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15786 fgMorphCombineSIMDFieldAssignments(block, stmt);
15790 fgMorphStmt = stmt;
15791 compCurStmt = stmt;
15792 GenTreePtr tree = stmt->gtStmtExpr;
15796 if (stmt == block->bbTreeList)
15798 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15801 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15805 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15810 /* Morph this statement tree */
15812 GenTreePtr morph = fgMorphTree(tree);
15814 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15816 fgCurrentlyInUseArgTemps->ZeroAll();
15818 // Has fgMorphStmt been sneakily changed ?
15820 if (stmt->gtStmtExpr != tree)
15822 /* This must be tailcall. Ignore 'morph' and carry on with
15823 the tail-call node */
15825 morph = stmt->gtStmtExpr;
15826 noway_assert(compTailCallUsed);
15827 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15828 noway_assert(stmt->gtNextStmt == nullptr);
15830 GenTreeCall* call = morph->AsCall();
15832 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15833 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15835 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15836 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15837 (compCurBB->bbFlags & BBF_HAS_JMP)));
15839 else if (block != compCurBB)
15841 /* This must be a tail call that caused a GCPoll to get
15842 injected. We haven't actually morphed the call yet
15843 but the flag still got set, clear it here... */
15844 CLANG_FORMAT_COMMENT_ANCHOR;
15847 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15850 noway_assert(compTailCallUsed);
15851 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15852 noway_assert(stmt->gtNextStmt == nullptr);
15854 GenTreeCall* call = morph->AsCall();
15857 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15858 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15860 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15861 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15862 (compCurBB->bbFlags & BBF_HAS_JMP)));
15866 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15868 // Clone all the trees to stress gtCloneExpr()
15872 printf("\nfgMorphTree (stressClone from):\n");
15876 morph = gtCloneExpr(morph);
15877 noway_assert(morph);
15881 printf("\nfgMorphTree (stressClone to):\n");
15886 /* If the hash value changes. we modified the tree during morphing */
15889 unsigned newHash = gtHashValue(morph);
15890 if (newHash != oldHash)
15892 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15898 /* Check for morph as a GT_COMMA with an unconditional throw */
15899 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15901 /* Use the call as the new stmt */
15902 morph = morph->gtOp.gtOp1;
15903 noway_assert(morph->gtOper == GT_CALL);
15904 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15906 fgRemoveRestOfBlock = true;
15909 stmt->gtStmtExpr = tree = morph;
15911 noway_assert(fgPtrArgCntCur == 0);
15913 if (fgRemoveRestOfBlock)
15918 /* Has the statement been optimized away */
15920 if (fgCheckRemoveStmt(block, stmt))
15925 /* Check if this block ends with a conditional branch that can be folded */
15927 if (fgFoldConditional(block))
15932 if (ehBlockHasExnFlowDsc(block))
15937 #if OPT_MULT_ADDSUB
15939 /* Note whether we have two or more +=/-= operators in a row */
15941 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
15943 if (prev && prev->gtOper == tree->gtOper)
15951 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
15953 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
15959 if (fgRemoveRestOfBlock)
15961 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15963 GenTreePtr first = block->bbTreeList;
15964 noway_assert(first);
15965 GenTreePtr last = first->gtPrev;
15966 noway_assert(last && last->gtNext == nullptr);
15967 GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
15969 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
15970 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
15972 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
15974 if (op1->OperKind() & GTK_RELOP)
15976 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15977 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15980 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
15984 /* Mark block as a BBJ_THROW block */
15985 fgConvertBBToThrowBB(block);
15988 noway_assert(fgExpandInline == false);
15990 #if FEATURE_FASTTAILCALL
15991 GenTreePtr recursiveTailCall = nullptr;
15992 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15994 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15999 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16002 // Reset this back so that it doesn't leak out impacting other blocks
16003 fgRemoveRestOfBlock = false;
16006 /*****************************************************************************
16008 * Morph the blocks of the method.
16009 * Returns true if the basic block list is modified.
16010 * This function should be called just once.
16013 void Compiler::fgMorphBlocks()
16018 printf("\n*************** In fgMorphBlocks()\n");
16022 /* Since fgMorphTree can be called after various optimizations to re-arrange
16023 * the nodes we need a global flag to signal if we are during the one-pass
16024 * global morphing */
16026 fgGlobalMorph = true;
16028 #if LOCAL_ASSERTION_PROP
16030 // Local assertion prop is enabled if we are optimized
16032 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16034 if (optLocalAssertionProp)
16037 // Initialize for local assertion prop
16039 optAssertionInit(true);
16041 #elif ASSERTION_PROP
16043 // If LOCAL_ASSERTION_PROP is not set
16044 // and we have global assertion prop
16045 // then local assertion prop is always off
16047 optLocalAssertionProp = false;
16051 /*-------------------------------------------------------------------------
16052 * Process all basic blocks in the function
16055 BasicBlock* block = fgFirstBB;
16056 noway_assert(block);
16059 compCurStmtNum = 0;
16064 #if OPT_MULT_ADDSUB
16072 bool loadw = false;
16077 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16081 #if LOCAL_ASSERTION_PROP
16082 if (optLocalAssertionProp)
16085 // Clear out any currently recorded assertion candidates
16086 // before processing each basic block,
16087 // also we must handle QMARK-COLON specially
16089 optAssertionReset(0);
16093 /* Process all statement trees in the basic block */
16097 fgMorphStmts(block, &mult, &lnot, &loadw);
16099 #if OPT_MULT_ADDSUB
16101 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16103 for (tree = block->bbTreeList; tree; tree = tree->gtNext)
16105 noway_assert(tree->gtOper == GT_STMT);
16106 GenTreePtr last = tree->gtStmt.gtStmtExpr;
16108 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16113 GenTreePtr dst1 = last->gtOp.gtOp1;
16114 GenTreePtr src1 = last->gtOp.gtOp2;
16116 if (!last->IsCnsIntOrI())
16121 if (dst1->gtOper != GT_LCL_VAR)
16125 if (!src1->IsCnsIntOrI())
16135 /* Look at the next statement */
16137 temp = tree->gtNext;
16143 noway_assert(temp->gtOper == GT_STMT);
16144 next = temp->gtStmt.gtStmtExpr;
16146 if (next->gtOper != last->gtOper)
16150 if (next->gtType != last->gtType)
16155 dst2 = next->gtOp.gtOp1;
16156 src2 = next->gtOp.gtOp2;
16158 if (dst2->gtOper != GT_LCL_VAR)
16162 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
16167 if (!src2->IsCnsIntOrI())
16172 if (last->gtOverflow() != next->gtOverflow())
16177 const ssize_t i1 = src1->gtIntCon.gtIconVal;
16178 const ssize_t i2 = src2->gtIntCon.gtIconVal;
16179 const ssize_t itemp = i1 + i2;
16181 /* if the operators are checking for overflow, check for overflow of the operands */
16183 if (next->gtOverflow())
16185 if (next->TypeGet() == TYP_LONG)
16187 if (next->gtFlags & GTF_UNSIGNED)
16189 ClrSafeInt<UINT64> si1(i1);
16190 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
16197 ClrSafeInt<INT64> si1(i1);
16198 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
16204 else if (next->gtFlags & GTF_UNSIGNED)
16206 ClrSafeInt<UINT32> si1(i1);
16207 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
16214 ClrSafeInt<INT32> si1(i1);
16215 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
16222 /* Fold the two increments/decrements into one */
16224 src1->gtIntCon.gtIconVal = itemp;
16225 #ifdef _TARGET_64BIT_
16226 if (src1->gtType == TYP_INT)
16228 src1->AsIntCon()->TruncateOrSignExtend32();
16230 #endif //_TARGET_64BIT_
16232 /* Remove the second statement completely */
16234 noway_assert(tree->gtNext == temp);
16235 noway_assert(temp->gtPrev == tree);
16239 noway_assert(temp->gtNext->gtPrev == temp);
16241 temp->gtNext->gtPrev = tree;
16242 tree->gtNext = temp->gtNext;
16246 tree->gtNext = nullptr;
16248 noway_assert(block->bbTreeList->gtPrev == temp);
16250 block->bbTreeList->gtPrev = tree;
16261 /* Are we using a single return block? */
16263 if (block->bbJumpKind == BBJ_RETURN)
16265 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16267 /* We'll jump to the genReturnBB */
16268 CLANG_FORMAT_COMMENT_ANCHOR;
16270 #if !defined(_TARGET_X86_)
16271 if (info.compFlags & CORINFO_FLG_SYNCH)
16273 fgConvertSyncReturnToLeave(block);
16276 #endif // !_TARGET_X86_
16278 block->bbJumpKind = BBJ_ALWAYS;
16279 block->bbJumpDest = genReturnBB;
16283 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16284 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16285 // Such blocks do materialize as part of in-lining.
16287 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16288 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16289 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16292 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16294 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16295 GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16297 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16298 if (genReturnLocal != BAD_VAR_NUM)
16300 // Method must be returning a value other than TYP_VOID.
16301 noway_assert(compMethodHasRetVal());
16303 // This block must be ending with a GT_RETURN
16304 noway_assert(last != nullptr);
16305 noway_assert(last->gtOper == GT_STMT);
16306 noway_assert(last->gtNext == nullptr);
16307 noway_assert(ret != nullptr);
16309 // GT_RETURN must have non-null operand as the method is returning the value assigned to
16311 noway_assert(ret->OperGet() == GT_RETURN);
16312 noway_assert(ret->gtGetOp1() != nullptr);
16314 GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16316 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16318 // make sure that copy-prop ignores this assignment.
16319 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16321 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16323 // This block ends with a GT_RETURN
16324 noway_assert(last != nullptr);
16325 noway_assert(last->gtOper == GT_STMT);
16326 noway_assert(last->gtNext == nullptr);
16328 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16329 noway_assert(ret->TypeGet() == TYP_VOID);
16330 noway_assert(ret->gtGetOp1() == nullptr);
16332 fgRemoveStmt(block, last);
16338 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
16339 fgTableDispBasicBlock(block);
16345 block = block->bbNext;
16348 /* We are done with the global morphing phase */
16350 fgGlobalMorph = false;
16355 fgDispBasicBlocks(true);
16360 //------------------------------------------------------------------------
16361 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16364 // fpPtrArgCntMax records the maximum number of pushed arguments.
16365 // Depending upon this value of the maximum number of pushed arguments
16366 // we may need to use an EBP frame or be partially interuptible.
16367 // This functionality has been factored out of fgSetOptions() because
16368 // the Rationalizer can create new calls.
16371 // This must be called before isFramePointerRequired() is called, because it is a
16372 // phased variable (can only be written before it has been read).
16374 void Compiler::fgCheckArgCnt()
16376 if (!compCanEncodePtrArgCntMax())
16381 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16382 "interruptible\n");
16385 genInterruptible = false;
16387 if (fgPtrArgCntMax >= sizeof(unsigned))
16392 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16395 codeGen->setFramePointerRequired(true);
16399 /*****************************************************************************
16401 * Make some decisions about the kind of code to generate.
16404 void Compiler::fgSetOptions()
16407 /* Should we force fully interruptible code ? */
16408 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16410 noway_assert(!codeGen->isGCTypeFixed());
16411 genInterruptible = true;
16415 if (opts.compDbgCode)
16417 assert(!codeGen->isGCTypeFixed());
16418 genInterruptible = true; // debugging is easier this way ...
16421 /* Assume we won't need an explicit stack frame if this is allowed */
16423 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16424 // the callee-saved registers.
16425 noway_assert(!compTailCallUsed || !compLocallocUsed);
16427 if (compLocallocUsed)
16429 codeGen->setFramePointerRequired(true);
16432 #ifdef _TARGET_X86_
16434 if (compTailCallUsed)
16435 codeGen->setFramePointerRequired(true);
16437 #endif // _TARGET_X86_
16439 if (!opts.genFPopt)
16441 codeGen->setFramePointerRequired(true);
16444 // Assert that the EH table has been initialized by now. Note that
16445 // compHndBBtabAllocCount never decreases; it is a high-water mark
16446 // of table allocation. In contrast, compHndBBtabCount does shrink
16447 // if we delete a dead EH region, and if it shrinks to zero, the
16448 // table pointer compHndBBtab is unreliable.
16449 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16451 #ifdef _TARGET_X86_
16453 // Note: this case, and the !X86 case below, should both use the
16454 // !X86 path. This would require a few more changes for X86 to use
16455 // compHndBBtabCount (the current number of EH clauses) instead of
16456 // info.compXcptnsCount (the number of EH clauses in IL), such as
16457 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16458 // an EH clause that we delete as statically dead code before we
16459 // get here, leaving no EH clauses left, and thus no requirement
16460 // to use a frame pointer because of EH. But until all the code uses
16461 // the same test, leave info.compXcptnsCount here.
16462 if (info.compXcptnsCount > 0)
16464 codeGen->setFramePointerRequiredEH(true);
16467 #else // !_TARGET_X86_
16469 if (compHndBBtabCount > 0)
16471 codeGen->setFramePointerRequiredEH(true);
16474 #endif // _TARGET_X86_
16476 #ifdef UNIX_X86_ABI
16477 if (info.compXcptnsCount > 0)
16479 assert(!codeGen->isGCTypeFixed());
16480 // Enforce fully interruptible codegen for funclet unwinding
16481 genInterruptible = true;
16483 #endif // UNIX_X86_ABI
16487 if (info.compCallUnmanaged)
16489 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16492 if (info.compPublishStubParam)
16494 codeGen->setFramePointerRequiredGCInfo(true);
16497 if (opts.compNeedSecurityCheck)
16499 codeGen->setFramePointerRequiredGCInfo(true);
16501 #ifndef JIT32_GCENCODER
16503 // The decoder only reports objects in frames with exceptions if the frame
16504 // is fully interruptible.
16505 // Even if there is no catch or other way to resume execution in this frame
16506 // the VM requires the security object to remain alive until later, so
16507 // Frames with security objects must be fully interruptible.
16508 genInterruptible = true;
16510 #endif // JIT32_GCENCODER
16513 if (compIsProfilerHookNeeded())
16515 codeGen->setFramePointerRequired(true);
16518 if (info.compIsVarArgs)
16520 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16521 codeGen->setFramePointerRequiredGCInfo(true);
16524 if (lvaReportParamTypeArg())
16526 codeGen->setFramePointerRequiredGCInfo(true);
16529 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16532 /*****************************************************************************/
16534 GenTreePtr Compiler::fgInitThisClass()
16536 noway_assert(!compIsForInlining());
16538 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16540 if (!kind.needsRuntimeLookup)
16542 return fgGetSharedCCtor(info.compClassHnd);
16546 #ifdef FEATURE_READYTORUN_COMPILER
16547 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16548 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16550 CORINFO_RESOLVED_TOKEN resolvedToken;
16551 memset(&resolvedToken, 0, sizeof(resolvedToken));
16553 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16554 // This covers the case of a generic method on a non-generic type.
16555 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16557 resolvedToken.hClass = info.compClassHnd;
16558 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16561 // We need a runtime lookup.
16562 GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16564 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16565 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16566 // inlining and there's no ambiguity.
16567 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16568 gtNewArgList(ctxTree), &kind);
16572 // Collectible types requires that for shared generic code, if we use the generic context paramter
16573 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16574 // context parameter is this that we don't need the eager reporting logic.)
16575 lvaGenericsContextUseCount++;
16577 switch (kind.runtimeLookupKind)
16579 case CORINFO_LOOKUP_THISOBJ:
16580 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16583 GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16584 // Vtable pointer of this object
16585 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16586 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16587 GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16589 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16590 gtNewArgList(vtTree, methodHnd));
16593 case CORINFO_LOOKUP_CLASSPARAM:
16595 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16596 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16599 case CORINFO_LOOKUP_METHODPARAM:
16601 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16602 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16603 gtNewArgList(gtNewIconNode(0), methHndTree));
16608 noway_assert(!"Unknown LOOKUP_KIND");
16613 /*****************************************************************************
16615 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16616 * except for the allowed ? 1 : 0; pattern.
16618 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16620 if ((*tree)->OperGet() == GT_QMARK)
16622 fgCheckQmarkAllowedForm(*tree);
16624 return WALK_CONTINUE;
16627 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16629 assert(tree->OperGet() == GT_QMARK);
16630 #ifndef LEGACY_BACKEND
16631 assert(!"Qmarks beyond morph disallowed.");
16632 #else // LEGACY_BACKEND
16633 GenTreePtr colon = tree->gtOp.gtOp2;
16635 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16636 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16637 #endif // LEGACY_BACKEND
16640 /*****************************************************************************
16642 * Verify that the importer has created GT_QMARK nodes in a way we can
16643 * process them. The following is allowed:
16645 * 1. A top level qmark. Top level qmark is of the form:
16646 * a) (bool) ? (void) : (void) OR
16647 * b) V0N = (bool) ? (type) : (type)
16649 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16650 * of either op1 of colon or op2 of colon but not a child of any other
16653 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16655 GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16657 // If the top level Qmark is null, then scan the tree to make sure
16658 // there are no qmarks within it.
16659 if (topQmark == nullptr)
16661 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16665 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16666 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16667 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16669 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16670 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16675 /*****************************************************************************
16677 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16678 * node is not present. If the top level GT_QMARK node is assigned to a
16679 * GT_LCL_VAR, then return the lcl node in ppDst.
16682 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16684 if (ppDst != nullptr)
16689 GenTreePtr topQmark = nullptr;
16690 if (expr->gtOper == GT_QMARK)
16694 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16696 topQmark = expr->gtOp.gtOp2;
16697 if (ppDst != nullptr)
16699 *ppDst = expr->gtOp.gtOp1;
16705 /*********************************************************************************
16707 * For a castclass helper call,
16708 * Importer creates the following tree:
16709 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16711 * This method splits the qmark expression created by the importer into the
16712 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16713 * Notice that op1 is the result for both the conditions. So we coalesce these
16714 * assignments into a single block instead of two blocks resulting a nested diamond.
16716 * +---------->-----------+
16720 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16722 * We expect to achieve the following codegen:
16723 * mov rsi, rdx tmp = op1 // asgBlock
16724 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16726 * mov rcx, 0x76543210 cns = op2 // cond2Block
16727 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16729 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16731 * SKIP: // remainderBlock
16732 * tmp has the result.
16735 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16740 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16741 fgDispBasicBlocks(block, block, true);
16745 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16747 GenTreePtr dst = nullptr;
16748 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16749 noway_assert(dst != nullptr);
16751 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16753 // Get cond, true, false exprs for the qmark.
16754 GenTreePtr condExpr = qmark->gtGetOp1();
16755 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16756 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16758 // Get cond, true, false exprs for the nested qmark.
16759 GenTreePtr nestedQmark = falseExpr;
16760 GenTreePtr cond2Expr;
16761 GenTreePtr true2Expr;
16762 GenTreePtr false2Expr;
16764 if (nestedQmark->gtOper == GT_QMARK)
16766 cond2Expr = nestedQmark->gtGetOp1();
16767 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16768 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16770 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16771 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16775 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16776 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16777 // That means it does not match our pattern.
16779 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16780 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16781 // entire subtree we expected to be the nested question op.
16783 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16784 true2Expr = nestedQmark;
16785 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16787 assert(false2Expr->OperGet() == trueExpr->OperGet());
16789 // Clear flags as they are now going to be part of JTRUE.
16790 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16791 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16793 // Create the chain of blocks. See method header comment.
16794 // The order of blocks after this is the following:
16795 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16797 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16798 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16799 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16800 // remainderBlock will still be GC safe.
16801 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16802 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16803 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16805 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16806 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16807 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16808 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16810 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16812 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16813 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16814 if ((block->bbFlags & BBF_INTERNAL) == 0)
16816 helperBlock->bbFlags &= ~BBF_INTERNAL;
16817 cond2Block->bbFlags &= ~BBF_INTERNAL;
16818 cond1Block->bbFlags &= ~BBF_INTERNAL;
16819 asgBlock->bbFlags &= ~BBF_INTERNAL;
16820 helperBlock->bbFlags |= BBF_IMPORTED;
16821 cond2Block->bbFlags |= BBF_IMPORTED;
16822 cond1Block->bbFlags |= BBF_IMPORTED;
16823 asgBlock->bbFlags |= BBF_IMPORTED;
16826 // Chain the flow correctly.
16827 fgAddRefPred(asgBlock, block);
16828 fgAddRefPred(cond1Block, asgBlock);
16829 fgAddRefPred(cond2Block, cond1Block);
16830 fgAddRefPred(helperBlock, cond2Block);
16831 fgAddRefPred(remainderBlock, helperBlock);
16832 fgAddRefPred(remainderBlock, cond1Block);
16833 fgAddRefPred(remainderBlock, cond2Block);
16835 cond1Block->bbJumpDest = remainderBlock;
16836 cond2Block->bbJumpDest = remainderBlock;
16838 // Set the weights; some are guesses.
16839 asgBlock->inheritWeight(block);
16840 cond1Block->inheritWeight(block);
16841 cond2Block->inheritWeightPercentage(cond1Block, 50);
16842 helperBlock->inheritWeightPercentage(cond2Block, 50);
16844 // Append cond1 as JTRUE to cond1Block
16845 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16846 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16847 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16849 // Append cond2 as JTRUE to cond2Block
16850 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16851 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16852 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16854 // AsgBlock should get tmp = op1 assignment.
16855 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16856 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16857 fgInsertStmtAtEnd(asgBlock, trueStmt);
16859 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16860 gtReverseCond(cond2Expr);
16861 GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16862 GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16863 fgInsertStmtAtEnd(helperBlock, helperStmt);
16865 // Finally remove the nested qmark stmt.
16866 fgRemoveStmt(block, stmt);
16871 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16872 fgDispBasicBlocks(block, remainderBlock, true);
16877 /*****************************************************************************
16879 * Expand a statement with a top level qmark node. There are three cases, based
16880 * on whether the qmark has both "true" and "false" arms, or just one of them.
16891 * S0 -->-- ~C -->-- T F -->-- S1
16896 * -----------------------------------------
16905 * S0 -->-- ~C -->-- T -->-- S1
16907 * +-->-------------+
16910 * -----------------------------------------
16919 * S0 -->-- C -->-- F -->-- S1
16921 * +-->------------+
16924 * If the qmark assigns to a variable, then create tmps for "then"
16925 * and "else" results and assign the temp to the variable as a writeback step.
16927 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
16929 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16931 // Retrieve the Qmark node to be expanded.
16932 GenTreePtr dst = nullptr;
16933 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16934 if (qmark == nullptr)
16939 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16941 fgExpandQmarkForCastInstOf(block, stmt);
16948 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
16949 fgDispBasicBlocks(block, block, true);
16953 // Retrieve the operands.
16954 GenTreePtr condExpr = qmark->gtGetOp1();
16955 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16956 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16958 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16959 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16961 assert(!varTypeIsFloating(condExpr->TypeGet()));
16963 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
16964 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16965 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16967 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16968 // block ... condBlock ... elseBlock ... remainderBlock
16970 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16971 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16972 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16973 // remainderBlock will still be GC safe.
16974 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16975 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16976 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16978 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16979 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16981 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16982 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16983 if ((block->bbFlags & BBF_INTERNAL) == 0)
16985 condBlock->bbFlags &= ~BBF_INTERNAL;
16986 elseBlock->bbFlags &= ~BBF_INTERNAL;
16987 condBlock->bbFlags |= BBF_IMPORTED;
16988 elseBlock->bbFlags |= BBF_IMPORTED;
16991 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16993 condBlock->inheritWeight(block);
16995 fgAddRefPred(condBlock, block);
16996 fgAddRefPred(elseBlock, condBlock);
16997 fgAddRefPred(remainderBlock, elseBlock);
16999 BasicBlock* thenBlock = nullptr;
17000 if (hasTrueExpr && hasFalseExpr)
17005 // S0 -->-- ~C -->-- T F -->-- S1
17010 gtReverseCond(condExpr);
17011 condBlock->bbJumpDest = elseBlock;
17013 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17014 thenBlock->bbJumpDest = remainderBlock;
17015 if ((block->bbFlags & BBF_INTERNAL) == 0)
17017 thenBlock->bbFlags &= ~BBF_INTERNAL;
17018 thenBlock->bbFlags |= BBF_IMPORTED;
17021 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17023 fgAddRefPred(thenBlock, condBlock);
17024 fgAddRefPred(remainderBlock, thenBlock);
17026 thenBlock->inheritWeightPercentage(condBlock, 50);
17027 elseBlock->inheritWeightPercentage(condBlock, 50);
17029 else if (hasTrueExpr)
17032 // S0 -->-- ~C -->-- T -->-- S1
17034 // +-->-------------+
17037 gtReverseCond(condExpr);
17038 condBlock->bbJumpDest = remainderBlock;
17039 fgAddRefPred(remainderBlock, condBlock);
17040 // Since we have no false expr, use the one we'd already created.
17041 thenBlock = elseBlock;
17042 elseBlock = nullptr;
17044 thenBlock->inheritWeightPercentage(condBlock, 50);
17046 else if (hasFalseExpr)
17049 // S0 -->-- C -->-- F -->-- S1
17051 // +-->------------+
17054 condBlock->bbJumpDest = remainderBlock;
17055 fgAddRefPred(remainderBlock, condBlock);
17057 elseBlock->inheritWeightPercentage(condBlock, 50);
17060 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17061 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17062 fgInsertStmtAtEnd(condBlock, jmpStmt);
17064 // Remove the original qmark statement.
17065 fgRemoveStmt(block, stmt);
17067 // Since we have top level qmarks, we either have a dst for it in which case
17068 // we need to create tmps for true and falseExprs, else just don't bother
17070 unsigned lclNum = BAD_VAR_NUM;
17071 if (dst != nullptr)
17073 assert(dst->gtOper == GT_LCL_VAR);
17074 lclNum = dst->gtLclVar.gtLclNum;
17078 assert(qmark->TypeGet() == TYP_VOID);
17083 if (dst != nullptr)
17085 trueExpr = gtNewTempAssign(lclNum, trueExpr);
17087 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17088 fgInsertStmtAtEnd(thenBlock, trueStmt);
17091 // Assign the falseExpr into the dst or tmp, insert in elseBlock
17094 if (dst != nullptr)
17096 falseExpr = gtNewTempAssign(lclNum, falseExpr);
17098 GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17099 fgInsertStmtAtEnd(elseBlock, falseStmt);
17105 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17106 fgDispBasicBlocks(block, remainderBlock, true);
17111 /*****************************************************************************
17113 * Expand GT_QMARK nodes from the flow graph into basic blocks.
17117 void Compiler::fgExpandQmarkNodes()
17121 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17123 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17125 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17127 fgPreExpandQmarkChecks(expr);
17129 fgExpandQmarkStmt(block, stmt);
17133 fgPostExpandQmarkChecks();
17136 compQmarkRationalized = true;
17140 /*****************************************************************************
17142 * Make sure we don't have any more GT_QMARK nodes.
17145 void Compiler::fgPostExpandQmarkChecks()
17147 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17149 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17151 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17152 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17158 /*****************************************************************************
17160 * Transform all basic blocks for codegen.
17163 void Compiler::fgMorph()
17165 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
17167 fgOutgoingArgTemps = nullptr;
17172 printf("*************** In fgMorph()\n");
17176 fgDispBasicBlocks(true);
17180 // Insert call to class constructor as the first basic block if
17181 // we were asked to do so.
17182 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
17183 impTokenLookupContextHandle /* context */) &
17184 CORINFO_INITCLASS_USE_HELPER)
17186 fgEnsureFirstBBisScratch();
17187 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
17191 if (opts.compGcChecks)
17193 for (unsigned i = 0; i < info.compArgsCount; i++)
17195 if (lvaTable[i].TypeGet() == TYP_REF)
17197 // confirm that the argument is a GC pointer (for debugging (GC stress))
17198 GenTreePtr op = gtNewLclvNode(i, TYP_REF);
17199 GenTreeArgList* args = gtNewArgList(op);
17200 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
17202 fgEnsureFirstBBisScratch();
17203 fgInsertStmtAtEnd(fgFirstBB, op);
17208 if (opts.compStackCheckOnRet)
17210 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
17211 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
17214 if (opts.compStackCheckOnCall)
17216 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
17217 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
17221 /* Filter out unimported BBs */
17223 fgRemoveEmptyBlocks();
17226 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17227 fgDebugCheckBBlist(false, false);
17230 EndPhase(PHASE_MORPH_INIT);
17235 JITDUMP("trees after inlining\n");
17236 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17239 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17241 EndPhase(PHASE_MORPH_INLINE);
17243 /* Add any internal blocks/trees we may need */
17248 fgMultipleNots = false;
17252 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17253 fgDebugCheckBBlist(false, false);
17256 fgRemoveEmptyTry();
17258 EndPhase(PHASE_EMPTY_TRY);
17260 fgRemoveEmptyFinally();
17262 EndPhase(PHASE_EMPTY_FINALLY);
17264 fgMergeFinallyChains();
17266 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17270 EndPhase(PHASE_CLONE_FINALLY);
17272 fgUpdateFinallyTargetFlags();
17274 /* For x64 and ARM64 we need to mark irregular parameters */
17275 fgMarkImplicitByRefArgs();
17277 /* Promote struct locals if necessary */
17278 fgPromoteStructs();
17280 /* Now it is the time to figure out what locals have address-taken. */
17281 fgMarkAddressExposedLocals();
17283 EndPhase(PHASE_STR_ADRLCL);
17285 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
17286 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
17287 fgRetypeImplicitByRefArgs();
17290 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
17292 fgStress64RsltMul();
17295 EndPhase(PHASE_MORPH_IMPBYREF);
17297 /* Morph the trees in all the blocks of the method */
17301 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
17302 fgMarkDemotedImplicitByRefArgs();
17304 EndPhase(PHASE_MORPH_GLOBAL);
17307 JITDUMP("trees after fgMorphBlocks\n");
17308 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17311 /* Decide the kind of code we want to generate */
17315 fgExpandQmarkNodes();
17318 compCurBB = nullptr;
17322 /*****************************************************************************
17324 * Promoting struct locals
17326 void Compiler::fgPromoteStructs()
17331 printf("*************** In fgPromoteStructs()\n");
17335 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17340 if (fgNoStructPromotion)
17346 // The code in this #if has been useful in debugging struct promotion issues, by
17347 // enabling selective enablement of the struct promotion optimization according to
17350 unsigned methHash = info.compMethodHash();
17351 char* lostr = getenv("structpromohashlo");
17352 unsigned methHashLo = 0;
17355 sscanf_s(lostr, "%x", &methHashLo);
17357 char* histr = getenv("structpromohashhi");
17358 unsigned methHashHi = UINT32_MAX;
17361 sscanf_s(histr, "%x", &methHashHi);
17363 if (methHash < methHashLo || methHash > methHashHi)
17369 printf("Promoting structs for method %s, hash = 0x%x.\n",
17370 info.compFullName, info.compMethodHash());
17371 printf(""); // in our logic this causes a flush
17376 if (info.compIsVarArgs)
17381 if (getNeedsGSSecurityCookie())
17389 printf("\nlvaTable before fgPromoteStructs\n");
17394 // The lvaTable might grow as we grab temps. Make a local copy here.
17395 unsigned startLvaCount = lvaCount;
17398 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17400 lvaStructPromotionInfo structPromotionInfo;
17401 bool tooManyLocals = false;
17403 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17405 // Whether this var got promoted
17406 bool promotedVar = false;
17407 LclVarDsc* varDsc = &lvaTable[lclNum];
17409 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17410 // its fields. Instead, we will attempt to enregister the entire struct.
17411 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17413 varDsc->lvRegStruct = true;
17415 // Don't promote if we have reached the tracking limit.
17416 else if (lvaHaveManyLocals())
17418 // Print the message first time when we detected this condition
17419 if (!tooManyLocals)
17421 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17423 tooManyLocals = true;
17425 else if (varTypeIsStruct(varDsc))
17427 bool shouldPromote;
17429 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17430 if (structPromotionInfo.canPromote)
17432 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17436 shouldPromote = false;
17440 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17441 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17442 static int structPromoVarNum = 0;
17443 structPromoVarNum++;
17444 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17449 // Promote the this struct local var.
17450 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17451 promotedVar = true;
17453 #ifdef _TARGET_ARM_
17454 if (structPromotionInfo.requiresScratchVar)
17456 // Ensure that the scratch variable is allocated, in case we
17457 // pass a promoted struct as an argument.
17458 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17460 lvaPromotedStructAssemblyScratchVar =
17461 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17462 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17465 #endif // _TARGET_ARM_
17469 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17471 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17472 // we will treat it as a reg struct.
17473 varDsc->lvRegStruct = true;
17480 printf("\nlvaTable after fgPromoteStructs\n");
17486 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17488 noway_assert(tree->OperGet() == GT_FIELD);
17490 GenTreePtr objRef = tree->gtField.gtFldObj;
17491 GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17492 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17494 /* Is this an instance data member? */
17496 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17498 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17499 LclVarDsc* varDsc = &lvaTable[lclNum];
17501 if (varTypeIsStruct(obj))
17503 if (varDsc->lvPromoted)
17506 unsigned fldOffset = tree->gtField.gtFldOffset;
17507 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17508 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17510 if (lvaIsImplicitByRefLocal(lclNum))
17512 // Keep track of the number of appearances of each promoted implicit
17513 // byref (here during struct promotion, which happens during address-exposed
17514 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17515 // byref params when deciding if it's legal to elide certain copies of them.
17516 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17517 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17518 // chance, so have to check now.
17520 "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
17521 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17522 varDsc->lvRefCnt++;
17525 tree->SetOper(GT_LCL_VAR);
17526 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17527 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17528 tree->gtFlags &= GTF_NODE_MASK;
17529 tree->gtFlags &= ~GTF_GLOB_REF;
17531 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17532 if (parent->gtOper == GT_ASG)
17534 if (parent->gtOp.gtOp1 == tree)
17536 tree->gtFlags |= GTF_VAR_DEF;
17537 tree->gtFlags |= GTF_DONT_CSE;
17540 // Promotion of struct containing struct fields where the field
17541 // is a struct with a single pointer sized scalar type field: in
17542 // this case struct promotion uses the type of the underlying
17543 // scalar field as the type of struct field instead of recursively
17544 // promoting. This can lead to a case where we have a block-asgn
17545 // with its RHS replaced with a scalar type. Mark RHS value as
17546 // DONT_CSE so that assertion prop will not do const propagation.
17547 // The reason this is required is that if RHS of a block-asg is a
17548 // constant, then it is interpreted as init-block incorrectly.
17550 // TODO - This can also be avoided if we implement recursive struct
17552 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17554 tree->gtFlags |= GTF_DONT_CSE;
17560 printf("Replacing the field in promoted struct with a local var:\n");
17561 fgWalkPre->printModified = true;
17564 return WALK_SKIP_SUBTREES;
17570 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17571 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17572 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17573 // there is one extremely rare case where that won't be true. An enum type is a special value type
17574 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17575 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17576 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17577 // ldfld. For example:
17579 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17581 // .field public specialname rtspecialname int16 value__
17582 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17584 // .method public hidebysig static void Main() cil managed
17586 // .locals init (valuetype mynamespace.e_t V_0)
17589 // ldflda int16 mynamespace.e_t::value__
17593 // Normally, compilers will not generate the ldflda, since it is superfluous.
17595 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17596 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17597 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17598 // address taken, and the appropriate SHORT load will be done from memory in that case.
17600 if (tree->TypeGet() == obj->TypeGet())
17602 if (lvaIsImplicitByRefLocal(lclNum))
17604 // Keep track of the number of appearances of each promoted implicit
17605 // byref (here during struct promotion, which happens during address-exposed
17606 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17607 // byref params when deciding if it's legal to elide certain copies of them.
17608 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17609 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17610 // chance, so have to check now.
17611 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
17612 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17613 varDsc->lvRefCnt++;
17616 tree->ChangeOper(GT_LCL_VAR);
17617 tree->gtLclVarCommon.SetLclNum(lclNum);
17618 tree->gtFlags &= GTF_NODE_MASK;
17620 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17621 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17623 tree->gtFlags |= GTF_VAR_DEF;
17624 tree->gtFlags |= GTF_DONT_CSE;
17629 printf("Replacing the field in normed struct with the local var:\n");
17630 fgWalkPre->printModified = true;
17633 return WALK_SKIP_SUBTREES;
17638 return WALK_CONTINUE;
17641 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17643 noway_assert(tree->OperGet() == GT_LCL_FLD);
17645 unsigned lclNum = tree->gtLclFld.gtLclNum;
17646 LclVarDsc* varDsc = &lvaTable[lclNum];
17648 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17651 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17652 unsigned fieldLclIndex = 0;
17653 LclVarDsc* fldVarDsc = nullptr;
17655 if (fldOffset != BAD_VAR_NUM)
17657 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17658 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17659 fldVarDsc = &lvaTable[fieldLclIndex];
17662 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17663 #ifdef _TARGET_X86_
17664 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17668 // There is an existing sub-field we can use
17669 tree->gtLclFld.SetLclNum(fieldLclIndex);
17671 // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
17672 CLANG_FORMAT_COMMENT_ANCHOR;
17674 #ifdef _TARGET_ARM_
17675 assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17677 assert(varTypeIsIntegralOrI(tree->TypeGet()));
17679 if (varTypeCanReg(fldVarDsc->TypeGet()))
17681 // If the type is integer-ish, then we can use it as-is
17682 tree->ChangeOper(GT_LCL_VAR);
17683 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17684 tree->gtType = fldVarDsc->TypeGet();
17688 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17689 fgWalkPre->printModified = true;
17694 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17695 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17697 tree->gtFlags |= GTF_VAR_DEF;
17698 tree->gtFlags |= GTF_DONT_CSE;
17703 // There is no existing field that has all the parts that we need
17704 // So we must ensure that the struct lives in memory.
17705 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17708 // We can't convert this guy to a float because he really does have his
17710 varDsc->lvKeepType = 1;
17714 return WALK_SKIP_SUBTREES;
17717 return WALK_CONTINUE;
17720 //------------------------------------------------------------------------
17721 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
17722 // i.e. which the ABI requires to be passed by making a copy in the caller and
17723 // passing its address to the callee. Mark their `LclVarDsc`s such that
17724 // `lvaIsImplicitByRefLocal` will return true for them.
17726 void Compiler::fgMarkImplicitByRefArgs()
17728 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17732 printf("\n*************** In fgMarkImplicitByRefs()\n");
17736 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17738 LclVarDsc* varDsc = &lvaTable[lclNum];
17740 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17744 if (varDsc->lvSize() > REGSIZE_BYTES)
17746 size = varDsc->lvSize();
17750 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17751 size = info.compCompHnd->getClassSize(typeHnd);
17754 #if defined(_TARGET_AMD64_)
17755 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17756 #elif defined(_TARGET_ARM64_)
17757 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17760 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17761 // So I am now using it to indicate that this is one of the weird implicit
17763 // The address taken cleanup will look for references to locals marked like
17764 // this, and transform them appropriately.
17765 varDsc->lvIsTemp = 1;
17767 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17768 // appearance of implicit-by-ref param so that call arg morphing can do an
17769 // optimization for single-use implicit-by-ref params whose single use is as
17770 // an outgoing call argument.
17771 varDsc->lvRefCnt = 0;
17776 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17779 //------------------------------------------------------------------------
17780 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17781 // struct to pointer). Also choose (based on address-exposed analysis)
17782 // which struct promotions of implicit byrefs to keep or discard.
17783 // For those which are kept, insert the appropriate initialization code.
17784 // For those which are to be discarded, annotate the promoted field locals
17785 // so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17786 // using indirections off the pointer parameters.
17788 void Compiler::fgRetypeImplicitByRefArgs()
17790 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17794 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17798 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17800 LclVarDsc* varDsc = &lvaTable[lclNum];
17802 if (lvaIsImplicitByRefLocal(lclNum))
17806 if (varDsc->lvSize() > REGSIZE_BYTES)
17808 size = varDsc->lvSize();
17812 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17813 size = info.compCompHnd->getClassSize(typeHnd);
17816 if (varDsc->lvPromoted)
17818 // This implicit-by-ref was promoted; create a new temp to represent the
17819 // promoted struct before rewriting this parameter as a pointer.
17820 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17821 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17822 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17823 varDsc = &lvaTable[lclNum];
17825 // Copy the struct promotion annotations to the new temp.
17826 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
17827 newVarDsc->lvPromoted = true;
17828 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17829 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
17830 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17831 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
17833 newVarDsc->lvKeepType = true;
17836 // Propagate address-taken-ness and do-not-enregister-ness.
17837 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
17838 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17840 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
17841 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
17842 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17843 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17844 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
17847 // If the promotion is dependent, the promoted temp would just be committed
17848 // to memory anyway, so we'll rewrite its appearances to be indirections
17849 // through the pointer parameter, the same as we'd do for this
17850 // parameter if it weren't promoted at all (otherwise the initialization
17851 // of the new temp would just be a needless memcpy at method entry).
17852 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17853 (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
17855 if (!undoPromotion)
17857 // Insert IR that initializes the temp from the parameter.
17858 // LHS is a simple reference to the temp.
17859 fgEnsureFirstBBisScratch();
17860 GenTreePtr lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17861 // RHS is an indirection (using GT_OBJ) off the parameter.
17862 GenTreePtr addr = gtNewLclvNode(lclNum, TYP_BYREF);
17863 GenTreePtr rhs = gtNewBlockVal(addr, (unsigned)size);
17864 GenTreePtr assign = gtNewAssignNode(lhs, rhs);
17865 fgInsertStmtAtBeg(fgFirstBB, assign);
17868 // Update the locals corresponding to the promoted fields.
17869 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17870 unsigned fieldCount = varDsc->lvFieldCnt;
17871 unsigned fieldLclStop = fieldLclStart + fieldCount;
17873 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17875 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17879 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17880 // will know to rewrite appearances of this local.
17881 assert(fieldVarDsc->lvParentLcl == lclNum);
17885 // Set the new parent.
17886 fieldVarDsc->lvParentLcl = newLclNum;
17887 // Clear the ref count field; it is used to communicate the nubmer of references
17888 // to the implicit byref parameter when morphing calls that pass the implicit byref
17889 // out as an outgoing argument value, but that doesn't pertain to this field local
17890 // which is now a field of a non-arg local.
17891 fieldVarDsc->lvRefCnt = 0;
17894 fieldVarDsc->lvIsParam = false;
17895 // The fields shouldn't inherit any register preferences from
17896 // the parameter which is really a pointer to the struct.
17897 fieldVarDsc->lvIsRegArg = false;
17898 fieldVarDsc->lvIsMultiRegArg = false;
17899 fieldVarDsc->lvSetIsHfaRegArg(false);
17900 fieldVarDsc->lvArgReg = REG_NA;
17901 #if FEATURE_MULTIREG_ARGS
17902 fieldVarDsc->lvOtherArgReg = REG_NA;
17904 fieldVarDsc->lvPrefReg = 0;
17907 // Hijack lvFieldLclStart to record the new temp number.
17908 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17909 varDsc->lvFieldLclStart = newLclNum;
17910 // Go ahead and clear lvFieldCnt -- either we're promoting
17911 // a replacement temp or we're not promoting this arg, and
17912 // in either case the parameter is now a pointer that doesn't
17913 // have these fields.
17914 varDsc->lvFieldCnt = 0;
17916 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17917 // whether references to the struct should be rewritten as
17918 // indirections off the pointer (not promoted) or references
17919 // to the new struct local (promoted).
17920 varDsc->lvPromoted = !undoPromotion;
17924 // The "undo promotion" path above clears lvPromoted for args that struct
17925 // promotion wanted to promote but that aren't considered profitable to
17926 // rewrite. It hijacks lvFieldLclStart to communicate to
17927 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
17928 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
17929 // Here we have an arg that was simply never promoted, so make sure it doesn't
17930 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
17931 // and fgMarkDemotedImplicitByRefArgs.
17932 assert(varDsc->lvFieldLclStart == 0);
17935 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
17936 varDsc->lvType = TYP_BYREF;
17938 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17939 // make sure that the following flag is not set as these will force SSA to
17940 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
17942 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17945 // This should not be converted to a double in stress mode,
17946 // because it is really a pointer
17947 varDsc->lvKeepType = 1;
17949 // The struct parameter may have had its address taken, but the pointer parameter
17950 // cannot -- any uses of the struct parameter's address are uses of the pointer
17951 // parameter's value, and there's no way for the MSIL to reference the pointer
17952 // parameter's address. So clear the address-taken bit for the parameter.
17953 varDsc->lvAddrExposed = 0;
17954 varDsc->lvDoNotEnregister = 0;
17958 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17964 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17967 //------------------------------------------------------------------------
17968 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
17969 // asked to promote. Appearances of these have now been rewritten
17970 // (by fgMorphImplicitByRefArgs) using indirections from the pointer
17971 // parameter or references to the promotion temp, as appropriate.
17973 void Compiler::fgMarkDemotedImplicitByRefArgs()
17975 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17977 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17979 LclVarDsc* varDsc = &lvaTable[lclNum];
17981 if (lvaIsImplicitByRefLocal(lclNum))
17983 if (varDsc->lvPromoted)
17985 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
17986 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
17987 // appearances of this arg needed to be rewritten to a new promoted struct local.
17988 varDsc->lvPromoted = false;
17990 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
17991 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
17992 varDsc->lvFieldLclStart = 0;
17994 else if (varDsc->lvFieldLclStart != 0)
17996 // We created new temps to represent a promoted struct corresponding to this
17997 // parameter, but decided not to go through with the promotion and have
17998 // rewritten all uses as indirections off the pointer parameter.
17999 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18000 // note of that and clear the annotation.
18001 unsigned structLclNum = varDsc->lvFieldLclStart;
18002 varDsc->lvFieldLclStart = 0;
18004 // Clear the arg's ref count; this was set during address-taken analysis so that
18005 // call morphing could identify single-use implicit byrefs; we're done with
18006 // that, and want it to be in its default state of zero when we go to set
18007 // real ref counts for all variables.
18008 varDsc->lvRefCnt = 0;
18010 // The temp struct is now unused; set flags appropriately so that we
18011 // won't allocate space for it on the stack.
18012 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
18013 structVarDsc->lvRefCnt = 0;
18014 structVarDsc->lvAddrExposed = false;
18016 structVarDsc->lvUnusedStruct = true;
18019 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18020 unsigned fieldCount = structVarDsc->lvFieldCnt;
18021 unsigned fieldLclStop = fieldLclStart + fieldCount;
18023 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18025 // Fix the pointer to the parent local.
18026 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18027 assert(fieldVarDsc->lvParentLcl == lclNum);
18028 fieldVarDsc->lvParentLcl = structLclNum;
18030 // The field local is now unused; set flags appropriately so that
18031 // we won't allocate stack space for it.
18032 fieldVarDsc->lvRefCnt = 0;
18033 fieldVarDsc->lvAddrExposed = false;
18039 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18042 /*****************************************************************************
18044 * Morph irregular parameters
18045 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18047 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree)
18049 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18053 #else // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18055 bool changed = false;
18057 // Implicit byref morphing needs to know if the reference to the parameter is a
18058 // child of GT_ADDR or not, so this method looks one level down and does the
18059 // rewrite whenever a child is a reference to an implicit byref parameter.
18060 if (tree->gtOper == GT_ADDR)
18062 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18064 GenTreePtr morphedTree = fgMorphImplicitByRefArgs(tree, true);
18065 changed = (morphedTree != nullptr);
18066 assert(!changed || (morphedTree == tree));
18071 for (GenTreePtr* pTree : tree->UseEdges())
18073 GenTreePtr childTree = *pTree;
18074 if (childTree->gtOper == GT_LCL_VAR)
18076 GenTreePtr newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18077 if (newChildTree != nullptr)
18080 *pTree = newChildTree;
18087 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18090 GenTreePtr Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, bool isAddr)
18092 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18093 assert(isAddr == (tree->gtOper == GT_ADDR));
18095 GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18096 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
18097 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
18099 CORINFO_FIELD_HANDLE fieldHnd;
18100 unsigned fieldOffset = 0;
18101 var_types fieldRefType = TYP_UNKNOWN;
18103 if (lvaIsImplicitByRefLocal(lclNum))
18105 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18106 // re-invoke the traversal to mark address-taken locals.
18107 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18108 // If we do, leave it as-is.
18109 if (!varTypeIsStruct(lclVarTree))
18111 assert(lclVarTree->TypeGet() == TYP_BYREF);
18115 else if (lclVarDsc->lvPromoted)
18117 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18118 // arg. Rewrite this to refer to the new local.
18119 assert(lclVarDsc->lvFieldLclStart != 0);
18120 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18124 fieldHnd = nullptr;
18126 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18128 // This was a field reference to an implicit-by-reference struct parameter that was
18129 // dependently promoted; update it to a field reference off the pointer.
18130 // Grab the field handle from the struct field lclVar.
18131 fieldHnd = lclVarDsc->lvFieldHnd;
18132 fieldOffset = lclVarDsc->lvFldOffset;
18133 assert(fieldHnd != nullptr);
18134 // Update lclNum/lclVarDsc to refer to the parameter
18135 lclNum = lclVarDsc->lvParentLcl;
18136 lclVarDsc = &lvaTable[lclNum];
18137 fieldRefType = lclVarTree->TypeGet();
18141 // We only need to tranform the 'marked' implicit by ref parameters
18145 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
18146 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
18150 if (fieldHnd == nullptr)
18152 // change &X into just plain X
18153 tree->CopyFrom(lclVarTree, this);
18154 tree->gtType = TYP_BYREF;
18158 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
18159 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
18160 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
18161 lclVarTree->gtType = TYP_BYREF;
18162 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
18168 printf("Replacing address of implicit by ref struct parameter with byref:\n");
18174 // Change X into OBJ(X) or FIELD(X, f)
18175 var_types structType = tree->gtType;
18176 tree->gtType = TYP_BYREF;
18180 tree->gtLclVarCommon.SetLclNum(lclNum);
18181 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
18185 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
18188 if (structType == TYP_STRUCT)
18190 gtSetObjGcInfo(tree->AsObj());
18193 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
18194 // we could remove TGTANYWHERE
18195 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
18200 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
18215 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
18216 enum AddrExposedContext
18218 AXC_None, // None of the below seen yet.
18219 AXC_Ind, // The address being computed is to be dereferenced.
18220 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
18221 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
18222 // addresses -- if the address addresses a field of a struct local, we need to consider
18223 // the entire local address taken (not just the field).
18224 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
18225 // on more bytes than the width of the storage location addressed. If this is a
18226 // field of a promoted struct local, declare the entire struct local address-taken.
18227 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
18228 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
18231 typedef ArrayStack<AddrExposedContext> AXCStack;
18233 // We use pre-post to simulate passing an argument in a recursion, via a stack.
18234 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18236 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18237 (void)axcStack->Pop();
18238 return WALK_CONTINUE;
18241 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18243 GenTreePtr tree = *pTree;
18244 Compiler* comp = fgWalkPre->compiler;
18245 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18246 AddrExposedContext axc = axcStack->Top();
18248 // In some situations, we have to figure out what the effective context is in which to
18249 // evaluate the current tree, depending on which argument position it is in its parent.
18256 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18257 assert(parent->OperGet() == GT_ADD);
18258 // Is one of the args a constant representing a field offset,
18259 // and is this the other? If so, Ind context.
18260 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
18264 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
18279 // Now recurse properly for the tree.
18280 switch (tree->gtOper)
18283 if (axc != AXC_Addr)
18285 axcStack->Push(AXC_Ind);
18289 axcStack->Push(AXC_None);
18291 return WALK_CONTINUE;
18295 if (axc == AXC_Addr)
18297 axcStack->Push(AXC_None);
18299 else if (tree->TypeGet() == TYP_STRUCT)
18301 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
18302 // or copyblk exceeds the size of a storage location whose address is used as one of the
18303 // arguments, then we have to consider that storage location (indeed, it's underlying containing
18304 // location) to be address taken. So get the width of the initblk or copyblk.
18306 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18307 GenTreeBlk* blk = tree->AsBlk();
18308 unsigned width = blk->gtBlkSize;
18309 noway_assert(width != 0);
18311 GenTree* addr = blk->Addr();
18312 if (addr->OperGet() == GT_ADDR)
18314 if (parent->gtOper == GT_ASG)
18316 if ((tree == parent->gtOp.gtOp1) &&
18317 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
18324 assert(parent->gtOper == GT_CALL);
18327 axcStack->Push(axc);
18331 // This is like a regular GT_IND.
18332 axcStack->Push(AXC_Ind);
18334 return WALK_CONTINUE;
18337 // Assume maximal width.
18338 axcStack->Push(AXC_IndWide);
18339 return WALK_CONTINUE;
18342 case GT_FIELD_LIST:
18343 axcStack->Push(AXC_None);
18344 return WALK_CONTINUE;
18347 // Taking the address of an array element never takes the address of a local.
18348 axcStack->Push(AXC_None);
18349 return WALK_CONTINUE;
18352 #ifdef FEATURE_SIMD
18353 if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
18355 axcStack->Push(AXC_None);
18358 #endif // FEATURE_SIMD
18359 if (axc == AXC_Ind)
18361 axcStack->Push(AXC_None);
18363 else if (axc == AXC_IndWide)
18365 axcStack->Push(AXC_AddrWide);
18369 assert(axc == AXC_None);
18370 axcStack->Push(AXC_Addr);
18372 return WALK_CONTINUE;
18375 // First, handle a couple of special cases: field of promoted struct local, field
18376 // of "normed" struct.
18377 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
18379 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
18380 // label it addr-taken.
18381 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
18383 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18384 comp->lvaSetVarAddrExposed(lclNum);
18385 if (axc == AXC_AddrWide)
18387 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18388 if (varDsc->lvIsStructField)
18390 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18394 // Push something to keep the PostCB, which will pop it, happy.
18395 axcStack->Push(AXC_None);
18396 return WALK_SKIP_SUBTREES;
18400 // GT_FIELD is an implicit deref.
18401 if (axc == AXC_Addr)
18403 axcStack->Push(AXC_None);
18405 else if (axc == AXC_AddrWide)
18407 axcStack->Push(AXC_IndWide);
18411 axcStack->Push(AXC_Ind);
18413 return WALK_CONTINUE;
18418 assert(axc != AXC_Addr);
18419 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18420 if (comp->lvaIsImplicitByRefLocal(lclNum))
18422 // Keep track of the number of appearances of each promoted implicit
18423 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18424 // checks the ref counts for implicit byref params when deciding if it's legal
18425 // to elide certain copies of them.
18426 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18427 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18428 varDsc->lvRefCnt + 1, lclNum);
18430 varDsc->lvRefCnt++;
18432 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
18433 // else WALK_CONTINUE. We do the same here.
18434 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
18435 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
18437 comp->lvaSetVarAddrExposed(lclNum);
18438 if (axc == AXC_AddrWide)
18440 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18441 if (varDsc->lvIsStructField)
18443 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18447 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
18448 // what, but something to be popped by the post callback. If we're going
18449 // to analyze children, the LCL_FLD creates an Ind context, so use that.
18450 axcStack->Push(AXC_Ind);
18456 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18457 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18459 if (comp->lvaIsImplicitByRefLocal(lclNum))
18461 // Keep track of the number of appearances of each promoted implicit
18462 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18463 // checks the ref counts for implicit byref params when deciding if it's legal
18464 // to elide certain copies of them.
18465 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18466 varDsc->lvRefCnt + 1, lclNum);
18468 varDsc->lvRefCnt++;
18471 if (axc == AXC_Addr || axc == AXC_AddrWide)
18473 comp->lvaSetVarAddrExposed(lclNum);
18474 if (axc == AXC_AddrWide)
18476 if (varDsc->lvIsStructField)
18478 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18482 // We may need to Quirk the storage size for this LCL_VAR
18483 // some PInvoke signatures incorrectly specify a ByRef to an INT32
18484 // when they actually write a SIZE_T or INT64
18485 if (axc == AXC_Addr)
18487 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
18490 // Push something to keep the PostCB, which will pop it, happy.
18491 axcStack->Push(AXC_None);
18492 // The tree is a leaf.
18493 return WALK_SKIP_SUBTREES;
18497 assert(axc != AXC_Addr);
18498 // See below about treating pointer operations as wider indirection.
18499 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
18501 axcStack->Push(AXC_IndWide);
18503 else if (axc == AXC_Ind)
18505 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
18506 // If it's an add of a constant and an address, and the constant represents a field,
18507 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
18508 axcStack->Push(AXC_IndAdd);
18512 axcStack->Push(axc);
18514 return WALK_CONTINUE;
18516 // !!! Treat Pointer Operations as Wider Indirection
18518 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
18519 // indirection. This is because the pointers could be pointing to the address of struct fields
18520 // and could be used to perform operations on the whole struct or passed to another method.
18522 // When visiting a node in this pre-order walk, we do not know if we would in the future
18523 // encounter a GT_ADDR of a GT_FIELD below.
18525 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
18526 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
18527 // wider indirection context down the expr tree.
18529 // Example, in unsafe code,
18531 // IL_000e 12 00 ldloca.s 0x0
18532 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
18533 // IL_0015 12 00 ldloca.s 0x0
18534 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
18537 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
18538 // consider GT_SUB to be equivalent of an AXC_IndWide.
18540 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
18541 // them as AXC_IndWide.
18565 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
18566 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
18568 axcStack->Push(AXC_IndWide);
18569 return WALK_CONTINUE;
18574 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
18575 // handle the "Ind" propogation explicitly above.
18576 if (axc == AXC_Addr || axc == AXC_AddrWide)
18578 axcStack->Push(axc);
18582 axcStack->Push(AXC_None);
18584 return WALK_CONTINUE;
18588 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
18590 if (tree->TypeGet() != TYP_STRUCT)
18592 return width <= genTypeSize(tree->TypeGet());
18594 else if (tree->OperGet() == GT_LCL_VAR)
18596 assert(tree->TypeGet() == TYP_STRUCT);
18597 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18598 return width <= lvaTable[lclNum].lvExactSize;
18600 else if (tree->OperGet() == GT_FIELD)
18602 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
18603 return width <= info.compCompHnd->getClassSize(fldClass);
18605 else if (tree->OperGet() == GT_INDEX)
18607 return width <= tree->gtIndex.gtIndElemSize;
18615 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
18617 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18619 switch (op1->OperGet())
18622 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18624 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18625 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18630 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18632 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18633 if (op1Fs != nullptr)
18635 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18636 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18639 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18641 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18642 if (op2Fs != nullptr)
18644 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18645 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18652 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18653 if (op1Fs != nullptr)
18655 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18656 op1->gtIntCon.gtFieldSeq = op1Fs;
18662 // Record in the general zero-offset map.
18663 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18668 /*****************************************************************************
18670 * Mark address-taken locals.
18673 void Compiler::fgMarkAddressExposedLocals()
18678 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18682 BasicBlock* block = fgFirstBB;
18683 noway_assert(block);
18687 /* Make the current basic block address available globally */
18693 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18695 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18696 AXCStack stk(this);
18697 stk.Push(AXC_None); // We start in neither an addr or ind context.
18698 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18701 block = block->bbNext;
18706 // fgNodesMayInterfere:
18707 // return true if moving nodes relative to each other can change the result of a computation
18710 // read: a node which reads
18713 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18715 LclVarDsc* srcVar = nullptr;
18717 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
18718 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18720 if (read->OperIsLocal())
18722 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18727 if (srcVar && srcVar->lvAddrExposed)
18731 else if (readIsIndir)
18737 else if (write->OperIsLocal())
18739 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18742 return dstVar->lvAddrExposed;
18744 else if (read->OperIsLocal())
18746 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18763 /** This predicate decides whether we will fold a tree with the structure:
18764 * x = x <op> y where x could be any arbitrary expression into
18767 * This modification is only performed when the target architecture supports
18768 * complex addressing modes. In the case of ARM for example, this transformation
18769 * yields no benefit.
18771 * In case this functions decides we can proceed to fold into an assignment operator
18772 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18773 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18774 * x <op>= y because the operator property.
18776 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18778 #if CPU_LOAD_STORE_ARCH
18779 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18781 #elif !defined(LEGACY_BACKEND)
18783 #else // defined(LEGACY_BACKEND)
18785 GenTreePtr op1 = tree->gtOp.gtOp1;
18786 GenTreePtr op2 = tree->gtGetOp2();
18787 genTreeOps cmop = op2->OperGet();
18789 /* Is the destination identical to the first RHS sub-operand? */
18790 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18793 Do not transform the following tree
18795 [0024CFA4] ----------- const int 1
18796 [0024CFDC] ----G------ | int
18797 [0024CF5C] ----------- lclVar ubyte V01 tmp0
18798 [0024D05C] -A--G------ = ubyte
18799 [0024D014] D------N--- lclVar ubyte V01 tmp0
18803 [0024CFA4] ----------- const int 1
18804 [0024D05C] -A--G------ |= ubyte
18805 [0024D014] U------N--- lclVar ubyte V01 tmp0
18807 , when V01 is a struct field local.
18810 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18812 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
18813 LclVarDsc* varDsc = lvaTable + lclNum;
18815 if (varDsc->lvIsStructField)
18824 else if (GenTree::OperIsCommutative(cmop))
18826 /* For commutative ops only, check for "a = x <op> a" */
18828 /* Should we be doing this at all? */
18829 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18834 /* Can we swap the operands to cmop ... */
18835 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18837 // Both sides must have side effects to prevent swap */
18841 /* Is the destination identical to the second RHS sub-operand? */
18842 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18849 #endif // defined(LEGACY_BACKEND)
18852 #ifdef FEATURE_SIMD
18854 //-----------------------------------------------------------------------------------
18855 // fgMorphCombineSIMDFieldAssignments:
18856 // If the RHS of the input stmt is a read for simd vector X Field, then this function
18857 // will keep reading next few stmts based on the vector size(2, 3, 4).
18858 // If the next stmts LHS are located contiguous and RHS are also located
18859 // contiguous, then we replace those statements with a copyblk.
18862 // block - BasicBlock*. block which stmt belongs to
18863 // stmt - GenTreeStmt*. the stmt node we want to check
18866 // if this funciton successfully optimized the stmts, then return true. Otherwise
18869 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18872 noway_assert(stmt->gtOper == GT_STMT);
18873 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18874 assert(tree->OperGet() == GT_ASG);
18876 GenTreePtr originalLHS = tree->gtOp.gtOp1;
18877 GenTreePtr prevLHS = tree->gtOp.gtOp1;
18878 GenTreePtr prevRHS = tree->gtOp.gtOp2;
18879 unsigned index = 0;
18880 var_types baseType = TYP_UNKNOWN;
18881 unsigned simdSize = 0;
18882 GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18884 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18886 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18890 var_types simdType = getSIMDTypeForSize(simdSize);
18891 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18892 int remainingAssignments = assignmentsCount;
18893 GenTreePtr curStmt = stmt->gtNext;
18894 GenTreePtr lastStmt = stmt;
18896 while (curStmt != nullptr && remainingAssignments > 0)
18898 GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18899 if (exp->OperGet() != GT_ASG)
18903 GenTreePtr curLHS = exp->gtGetOp1();
18904 GenTreePtr curRHS = exp->gtGetOp2();
18906 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18911 remainingAssignments--;
18915 lastStmt = curStmt;
18916 curStmt = curStmt->gtNext;
18919 if (remainingAssignments > 0)
18921 // if the left assignments number is bigger than zero, then this means
18922 // that the assignments are not assgining to the contiguously memory
18923 // locations from same vector.
18929 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18930 printf("From BB%02u, stmt", block->bbNum);
18932 printf(" to stmt");
18933 printTreeID(lastStmt);
18938 for (int i = 0; i < assignmentsCount; i++)
18940 fgRemoveStmt(block, stmt->gtNext);
18943 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18944 if (simdStructNode->OperIsLocal())
18946 setLclRelatedToSIMDIntrinsic(simdStructNode);
18948 GenTree* copyBlkAddr = copyBlkDst;
18949 if (copyBlkAddr->gtOper == GT_LEA)
18951 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18953 GenTreeLclVarCommon* localDst = nullptr;
18954 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18956 setLclRelatedToSIMDIntrinsic(localDst);
18959 GenTree* simdStructAddr;
18960 if (simdStructNode->TypeGet() == TYP_BYREF)
18962 assert(simdStructNode->OperIsLocal());
18963 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18964 simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
18968 assert(varTypeIsSIMD(simdStructNode));
18974 printf("\nBB%02u stmt", block->bbNum);
18976 printf("(before)\n");
18981 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
18982 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
18983 blkNode->gtType = simdType;
18984 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
18985 false, // not volatile
18986 true); // copyBlock
18988 stmt->gtStmt.gtStmtExpr = tree;
18990 // Since we generated a new address node which didn't exist before,
18991 // we should expose this address manually here.
18992 AXCStack stk(this);
18993 stk.Push(AXC_None);
18994 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18999 printf("\nReplaced BB%02u stmt", block->bbNum);
19001 printf("(after)\n");
19008 #endif // FEATURE_SIMD