1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 /*****************************************************************************/
23 // Split a tree at the given point
24 // -- Introduces a new temporary variable
25 // -- evaluates *splitPoint into the new temp, in a new statement inserted before 'stmt'
26 // -- substitutes the temporary for '*splitPoint' in 'stmt'
27 // '*splitpoint' must be a node in 'stmt', which is within 'blk', and 'splitpoint' is a pointer
28 // to the link to that node, contained in its parent node.
29 GenTree* Compiler::fgMorphSplitTree(GenTree** splitPoint, // where to split
30 GenTree* stmt, // top level statement housing this tree
31 BasicBlock* blk) // block we are in
36 if ((*splitPoint)->OperIsAssignment())
38 // it's already being assigned so don't introduce a new one
39 newTree = *splitPoint;
40 temp = (*splitPoint)->gtGetOp1();
44 unsigned lclNum = lvaGrabTemp(true DEBUGARG("split tree"));
45 newTree = gtNewTempAssign(lclNum, *splitPoint);
46 temp = gtNewLclvNode(lclNum, (*splitPoint)->TypeGet());
49 GenTreePtr asg = gtNewStmt(newTree);
53 fgInsertStmtBefore(blk, stmt, asg);
58 // state carried over the tree walk, to be used in making
59 // a splitting decision.
62 // number of calls seen
65 // callback to determine if we should split here
66 Compiler::fgSplitPredicate* pred;
68 // root stmt of tree being processed
74 // Returns true if we should split the tree above this node.
75 // For ARM FP, handling multiple calls per tree via a local and
76 // greedy register allocator could result in a lot of shuffling.
77 // So let the global register allocator handle these cases.
78 bool shouldSplitARM(GenTree* tree, GenTree* parent, Compiler::fgWalkData* data)
81 && varTypeIsFloating(tree)
83 && !parent->OperIsAssignment())
85 // increment call count
86 SplitData* tmpState = (SplitData*) data->pCallbackData;
89 return tmpState->count > 1;
96 #endif // _TARGET_ARM_
98 // Callback for the tree walker, called once per node.
99 // Determines if we want to split, performs the split, and then processes the rest of the tree
100 Compiler::fgWalkResult Compiler::fgSplitHelper(GenTree** ppTree, fgWalkData* data)
102 GenTree* tree = *ppTree;
103 Compiler* comp = data->compiler;
105 SplitData* tmpState = (SplitData*) data->pCallbackData;
107 fgSplitPredicate* pred = tmpState->pred;
109 if (pred(tree, data->parent, data)) // does this look like somewhere we want to split?
111 //printf("tmpstate2 = %d %p r:%p tmp:%p tree:%p\n", tmpState->count, tmpState->pred, tmpState->root, tmpState, tree);
112 GenTree* result = comp->fgMorphSplitTree(ppTree, tmpState->root, comp->compCurBB);
114 GenTree* oldStatement = comp->compCurStmt;
115 comp->compCurStmt = result;
117 // because we are doing this in pre-order we also have to process
118 // the subtree that we have just split off
119 comp->fgSplitProcessOneTree(result, pred);
122 comp->compCurStmt = oldStatement;
124 return Compiler::WALK_SKIP_SUBTREES;
126 //else printf("tmpstate3 = %d %p r:%p tmp:%p tree:%p\n", tmpState->count, tmpState->pred, tmpState->root, tmpState, tree);
128 return Compiler::WALK_CONTINUE;
131 void Compiler::fgSplitProcessOneTree(GenTree* tree, fgSplitPredicate pred)
133 SplitData tmpState = {0};
134 tmpState.pred = pred;
135 tmpState.root = tree;
137 fgWalkTreePre(&(tree->gtStmt.gtStmtExpr),
142 // Split expression trees at points which in the case of ARM this is done.
143 void Compiler::fgSplitMethodTrees(void)
147 #else // _TARGET_ARM_
148 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
151 for (GenTree* tree = block->bbTreeList; tree; tree = tree->gtNext)
153 assert(tree != tree->gtNext);
154 fgSplitProcessOneTree(tree, shouldSplitARM);
157 #endif // _TARGET_ARM_
161 // Convert the given node into a call to the specified helper passing
162 // the given argument list.
163 // Tries to fold constants and also adds an edge for overflow exception
164 // returns the morphed tree
165 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree,
171 /* If the operand is a constant, we'll try to fold it */
172 if (oper->OperIsConst())
174 GenTreePtr oldTree = tree;
176 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
179 return fgMorphTree(tree);
180 else if (tree->OperKind() & GTK_CONST)
181 return fgMorphConst(tree);
183 // assert that oper is unchanged and that it is still a GT_CAST node
184 noway_assert(tree->gtCast.CastOp() == oper);
185 noway_assert(tree->gtOper == GT_CAST);
187 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
188 assert(result == tree);
193 /*****************************************************************************
195 * Convert the given node into a call to the specified helper passing
196 * the given argument list.
199 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree,
201 GenTreeArgList* args)
203 tree->ChangeOper(GT_CALL);
205 tree->gtFlags |= GTF_CALL;
206 tree->gtCall.gtCallType = CT_HELPER;
207 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
208 tree->gtCall.gtCallArgs = args;
209 tree->gtCall.gtCallObjp = NULL;
210 tree->gtCall.gtCallLateArgs = NULL;
211 tree->gtCall.fgArgInfo = NULL;
212 tree->gtCall.gtRetClsHnd = NULL;
213 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
214 tree->gtCall.gtCallMoreFlags = 0;
215 tree->gtCall.gtInlineCandidateInfo = NULL;
216 tree->gtCall.gtControlExpr = NULL;
218 #ifdef FEATURE_READYTORUN_COMPILER
219 tree->gtCall.gtEntryPoint.addr = nullptr;
222 /* Perform the morphing */
224 tree = fgMorphArgs(tree->AsCall());
229 /*****************************************************************************
230 * This node should not be referenced by anyone now. Set its values to garbage
231 * to catch extra references
235 void DEBUG_DESTROY_NODE(GenTreePtr tree)
238 // printf("DEBUG_DESTROY_NODE for [0x%08x]\n", tree);
240 // Save gtOper in case we want to find out what this node was
241 tree->gtOperSave = tree->gtOper;
243 tree->gtType = TYP_UNDEF;
244 tree->gtFlags |= 0xFFFFFFFF & ~GTF_NODE_MASK;
245 if (tree->OperIsSimple())
248 tree->gtOp.gtOp2 = NULL;
250 // Must do this last, because the "gtOp" check above will fail otherwise.
251 // Don't call SetOper, because GT_COUNT is not a valid value
252 tree->gtOper = GT_COUNT;
257 /*****************************************************************************
259 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
260 * This is done when code generation can't create straight-line code to do it.
262 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
264 #ifndef LEGACY_BACKEND
266 #else // LEGACY_BACKEND
267 return (genActualType(tree->TypeGet()) == TYP_LONG) ||
268 varTypeIsFloating(tree->TypeGet());
269 #endif // LEGACY_BACKEND
273 /*****************************************************************************
275 * Morph a cast node (we perform some very simple transformations here).
279 #pragma warning(push)
280 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
282 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
284 noway_assert(tree->gtOper == GT_CAST);
285 noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
287 /* The first sub-operand is the thing being cast */
289 GenTreePtr oper = tree->gtCast.CastOp();
290 var_types srcType = genActualType(oper->TypeGet());
293 var_types dstType = tree->CastToType();
294 unsigned dstSize = genTypeSize(dstType);
296 // See if the cast has to be done in two steps. R -> I
297 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
299 // Only x86 must go through TYP_DOUBLE to get to all
300 // integral types everybody else can get straight there
301 // except for when using helpers
302 if (srcType == TYP_FLOAT
303 #if !FEATURE_STACK_FP_X87
305 #if defined(_TARGET_ARM64_)
306 // Amd64: src = float, dst is overflow conversion.
307 // This goes through helper and hence src needs to be converted to double.
308 && tree->gtOverflow()
309 #elif defined(_TARGET_AMD64_)
310 // Amd64: src = float, dst = uint64 or overflow conversion.
311 // This goes through helper and hence src needs to be converted to double.
312 && (tree->gtOverflow() || (dstType == TYP_ULONG))
313 #elif defined(_TARGET_ARM_)
314 // Arm: src = float, dst = int64/uint64 or overflow conversion.
315 && (tree->gtOverflow() || varTypeIsLong(dstType))
318 #endif // FEATURE_STACK_FP_X87
321 oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
324 // do we need to do it in two steps R -> I, '-> smallType
325 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
326 if (dstSize < genTypeSize(TYP_INT))
328 oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
329 oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED|GTF_OVERFLOW|GTF_EXCEPT));
330 tree->gtFlags &= ~GTF_UNSIGNED;
333 if (dstSize < sizeof(void*))
335 oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
336 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT));
341 /* Note that if we need to use a helper call then we can not morph oper */
342 if (!tree->gtOverflow())
344 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
350 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
351 if ((oper->gtOper == GT_INTRINSIC) &&
352 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
354 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
355 oper->gtType = dstType;
356 return fgMorphTree(oper);
358 // if SSE2 is not enabled, we need the helper
359 else if (!opts.compCanUseSSE2)
361 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
364 #endif // _TARGET_X86_
368 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
369 case TYP_UINT: goto OPTIMIZECAST;
370 #else // _TARGET_ARM_
371 case TYP_UINT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
372 #endif // _TARGET_ARM_
374 #ifdef _TARGET_AMD64_
375 // SSE2 has instructions to convert a float/double directly to a long
376 case TYP_LONG: goto OPTIMIZECAST;
378 case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
379 #endif //_TARGET_AMD64_
380 case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
383 #endif // _TARGET_ARM64_
389 case TYP_INT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
390 case TYP_UINT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
391 case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
392 case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
396 noway_assert(!"Unexpected dstType");
399 #ifndef _TARGET_64BIT_
400 // The code generation phase (for x86 & ARM32) does not handle casts
401 // directly from [u]long to anything other than [u]int. Insert an
402 // intermediate cast to native int.
403 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
405 oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
406 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED));
407 tree->gtFlags &= ~GTF_UNSIGNED;
409 #endif //!_TARGET_64BIT_
412 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && !varTypeIsLong(oper->gtCast.CastOp()))
414 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
415 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
416 // This happens semi-frequently because there is no IL 'conv.r4.un'
417 oper->gtType = TYP_FLOAT;
418 oper->CastToType() = TYP_FLOAT;
419 return fgMorphTree(oper);
421 // converts long/ulong --> float/double casts into helper calls.
422 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
424 if (dstType == TYP_FLOAT)
426 // there is only a double helper, so we
427 // - change the dsttype to double
428 // - insert a cast from double to float
429 // - recurse into the resulting tree
430 tree->CastToType() = TYP_DOUBLE;
431 tree->gtType = TYP_DOUBLE;
433 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
435 return fgMorphTree(tree);
437 if (tree->gtFlags & GTF_UNSIGNED)
438 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
439 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
441 #endif //_TARGET_ARM_
443 #ifdef _TARGET_AMD64_
444 // Do we have to do two step U4/8 -> R4/8 ?
445 // Codegen supports the following conversion as one-step operation
449 // The following conversions are performed as two-step operations using above.
450 // U4 -> R4/8 = U4-> Long -> R4/8
451 // U8 -> R4 = U8 -> R8 -> R4
452 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
454 srcType = genUnsignedType(srcType);
456 if (srcType == TYP_ULONG)
458 if (dstType == TYP_FLOAT)
460 // Codegen can handle U8 -> R8 conversion.
461 // U8 -> R4 = U8 -> R8 -> R4
462 // - change the dsttype to double
463 // - insert a cast from double to float
464 // - recurse into the resulting tree
465 tree->CastToType() = TYP_DOUBLE;
466 tree->gtType = TYP_DOUBLE;
467 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
468 return fgMorphTree(tree);
471 else if (srcType == TYP_UINT)
473 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
474 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED));
475 tree->gtFlags &= ~GTF_UNSIGNED;
478 #endif // _TARGET_AMD64_
481 // Do we have to do two step U4/8 -> R4/8 ?
482 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
484 srcType = genUnsignedType(srcType);
486 if (srcType == TYP_ULONG)
488 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
490 else if (srcType == TYP_UINT)
492 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
493 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED));
494 tree->gtFlags &= ~GTF_UNSIGNED;
497 #endif //_TARGET_XARCH_
498 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
500 // We are casting away GC information. we would like to just
501 // change the type to int, however this gives the emitter fits because
502 // it believes the variable is a GC variable at the begining of the
503 // instruction group, but is not turned non-gc by the code generator
504 // we fix this by copying the GC pointer to a non-gc pointer temp.
505 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
507 // We generate an assignment to an int and then do the cast from an int. With this we avoid
508 // the gc problem and we allow casts to bytes, longs, etc...
509 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
510 oper->gtType = TYP_I_IMPL;
511 GenTreePtr asg = gtNewTempAssign(lclNum, oper);
512 oper->gtType = srcType;
515 GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
517 // Generate the comma tree
518 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
520 return fgMorphTree(oper);
523 // Look for narrowing casts ([u]long -> [u]int) and try to push them
524 // down into the operand before morphing it.
526 // It doesn't matter if this is cast is from ulong or long (i.e. if
527 // GTF_UNSIGNED is set) because the transformation is only applied to
528 // overflow-insensitive narrowing casts, which always silently truncate.
530 // Note that casts from [u]long to small integer types are handled above.
531 if ((srcType == TYP_LONG) &&
532 ((dstType == TYP_INT) || (dstType == TYP_UINT)))
534 // As a special case, look for overflow-sensitive casts of an AND
535 // expression, and see if the second operand is a small constant. Since
536 // the result of an AND is bound by its smaller operand, it may be
537 // possible to prove that the cast won't overflow, which will in turn
538 // allow the cast's operand to be transformed.
539 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
541 GenTreePtr andOp2 = oper->gtOp.gtOp2;
543 // Special case to the special case: AND with a casted int.
544 if ((andOp2->OperGet() == GT_CAST) &&
545 (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
547 // gtFoldExprConst will deal with whether the cast is signed or
548 // unsigned, or overflow-sensitive.
549 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
552 // Look for a constant less than 2^{32} for a cast to uint, or less
553 // than 2^{31} for a cast to int.
554 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
556 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) &&
557 ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
559 // This cast can't overflow.
560 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
564 // Only apply this transformation during global morph,
565 // when neither the cast node nor the oper node may throw an exception
566 // based on the upper 32 bits.
569 !tree->gtOverflow() &&
570 !oper->gtOverflowEx())
572 // For these operations the lower 32 bits of the result only depends
573 // upon the lower 32 bits of the operands
575 if ( (oper->OperGet() == GT_ADD) ||
576 (oper->OperGet() == GT_MUL) ||
577 (oper->OperGet() == GT_AND) ||
578 (oper->OperGet() == GT_OR) ||
579 (oper->OperGet() == GT_XOR) )
581 DEBUG_DESTROY_NODE(tree);
583 // Insert narrowing casts for op1 and op2
584 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
585 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
587 // Clear the GT_MUL_64RSLT if it is set
588 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
589 oper->gtFlags &= ~GTF_MUL_64RSLT;
591 // The operation now produces a 32-bit result.
592 oper->gtType = TYP_INT;
594 // Remorph the new tree as the casts that we added may be folded away.
595 return fgMorphTree(oper);
601 noway_assert(tree->gtOper == GT_CAST);
603 /* Morph the operand */
604 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
606 /* Reset the call flag */
607 tree->gtFlags &= ~GTF_CALL;
609 /* unless we have an overflow cast, reset the except flag */
610 if (!tree->gtOverflow())
611 tree->gtFlags &= ~GTF_EXCEPT;
613 /* Just in case new side effects were introduced */
614 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
616 srcType = oper->TypeGet();
618 /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
619 if (tree->gtFlags & GTF_UNSIGNED)
620 srcType = genUnsignedType(srcType);
622 srcSize = genTypeSize(srcType);
624 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
626 /* See if we can discard the cast */
627 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
629 if (srcType == dstType) // Certainly if they are identical it is pointless
632 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
634 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
635 LclVarDsc * varDsc = &lvaTable[varNum];
636 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
640 bool unsignedSrc = varTypeIsUnsigned(srcType);
641 bool unsignedDst = varTypeIsUnsigned(dstType);
642 bool signsDiffer = (unsignedSrc != unsignedDst);
644 // For same sized casts with
645 // the same signs or non-overflow cast we discard them as well
646 if (srcSize == dstSize)
648 /* This should have been handled above */
649 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
654 if (!tree->gtOverflow())
656 /* For small type casts, when necessary we force
657 the src operand to the dstType and allow the
658 implied load from memory to perform the casting */
659 if (varTypeIsSmall(srcType))
661 switch (oper->gtOper)
667 oper->gtType = dstType;
677 if (srcSize < dstSize) // widening cast
679 // Keep any long casts
680 if (dstSize == sizeof(int))
682 // Only keep signed to unsigned widening cast with overflow check
683 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
687 // Casts from signed->unsigned can never overflow while widening
689 if (unsignedSrc || !unsignedDst)
690 tree->gtFlags &= ~GTF_OVERFLOW;
694 // Try to narrow the operand of the cast and discard the cast
695 // Note: Do not narrow a cast that is marked as a CSE
696 // And do not narrow if the oper is marked as a CSE either
698 if (!tree->gtOverflow() &&
699 !gtIsActiveCSE_Candidate(oper) &&
700 (opts.compFlags & CLFLG_TREETRANS) &&
701 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
703 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
705 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
706 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
708 oper = oper->gtCast.CastOp();
715 switch (oper->gtOper)
717 /* If the operand is a constant, we'll fold it */
723 GenTreePtr oldTree = tree;
725 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
727 // Did we get a comma throw as a result of gtFoldExprConst?
728 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
730 noway_assert(fgIsCommaThrow(tree));
731 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
732 fgMorphTreeDone(tree);
735 else if (tree->gtOper != GT_CAST)
738 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
743 /* Check for two consecutive casts into the same dstType */
744 if (!tree->gtOverflow())
746 var_types dstType2 = oper->CastToType();
747 if (dstType == dstType2)
752 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
753 so that the code generator will know not to convert the result
754 of the idiv to a regpair */
756 if (dstType == TYP_INT)
757 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
761 if (dstType == TYP_UINT)
762 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
766 // Check for cast of a GT_COMMA with a throw overflow
767 // Bug 110829: Since this optimization will bash the types
768 // neither oper or commaOp2 can be CSE candidates
769 if (fgIsCommaThrow(oper) &&
770 !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
772 GenTreePtr commaOp2 = oper->gtOp.gtOp2;
774 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
776 // need type of oper to be same as tree
777 if (tree->gtType == TYP_LONG)
779 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
780 commaOp2->gtIntConCommon.SetLngValue(0);
781 /* Change the types of oper and commaOp2 to TYP_LONG */
782 oper->gtType = commaOp2->gtType = TYP_LONG;
784 else if (varTypeIsFloating(tree->gtType))
786 commaOp2->ChangeOperConst(GT_CNS_DBL);
787 commaOp2->gtDblCon.gtDconVal = 0.0;
788 // Change the types of oper and commaOp2
789 // X87 promotes everything to TYP_DOUBLE
790 // But other's are a little more precise
791 const var_types newTyp
792 #if FEATURE_X87_DOUBLES
794 #else // FEATURE_X87_DOUBLES
796 #endif // FEATURE_X87_DOUBLES
797 oper->gtType = commaOp2->gtType = newTyp;
801 commaOp2->ChangeOperConst(GT_CNS_INT);
802 commaOp2->gtIntCon.gtIconVal = 0;
803 /* Change the types of oper and commaOp2 to TYP_INT */
804 oper->gtType = commaOp2->gtType = TYP_INT;
808 if (vnStore != nullptr)
810 fgValueNumberTreeConst(commaOp2);
813 /* Return the GT_COMMA node as the new tree */
820 } /* end switch (oper->gtOper) */
823 if (tree->gtOverflow())
824 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
830 /* Here we've eliminated the cast, so just return it's operand */
831 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
833 DEBUG_DESTROY_NODE(tree);
840 /*****************************************************************************
842 * Perform an unwrap operation on a Proxy object
845 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
847 assert(info.compIsContextful &&
848 info.compUnwrapContextful &&
851 CORINFO_EE_INFO * pInfo = eeGetEEInfo();
854 // Perform the unwrap:
856 // This requires two extra indirections.
857 // We mark these indirections as 'invariant' and
858 // the CSE logic will hoist them when appropriate.
860 // Note that each dereference is a GC pointer
862 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL,
864 gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
866 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
867 objRef->gtFlags |= GTF_IND_INVARIANT;
869 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL,
871 gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
873 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
874 objRef->gtFlags |= GTF_IND_INVARIANT;
876 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
880 /*****************************************************************************
882 * Morph an argument list; compute the pointer argument count in the process.
884 * NOTE: This function can be called from any place in the JIT to perform re-morphing
885 * due to graph altering modifications such as copy / constant propagation
888 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
890 assert(tree->gtOper == GT_LIST);
893 if (tree->gtOp.gtOp2)
895 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
898 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
900 tree->gtFlags &= ~GTF_ALL_EFFECT;
901 tree->gtFlags |= flags;
903 return tree->gtFlags;
907 void fgArgTabEntry::Dump()
909 if (regNum == REG_STK)
911 printf("fgArgTabEntry[arg%d, stk%02x, slots=%d", argNum, slotNum, numSlots);
915 #ifdef _TARGET_ARM64_
916 if (emitter::isFloatReg(regNum))
918 printf("fgArgTabEntry[arg%d, d%d, regs=%d", argNum, regNum-REG_FP_FIRST, numRegs);
920 else // integer register
922 printf("fgArgTabEntry[arg%d, x%d, regs=%d", argNum, regNum-REG_INT_FIRST, numRegs);
925 printf("fgArgTabEntry[arg%02d, r%d, regs=%d", argNum, regNum, numRegs);
930 printf(", tmpNum=V%02d", tmpNum);
940 fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs)
943 callTree = call; assert(call->IsCall());
944 argCount = 0; // filled in arg count, starts at zero
945 nextSlotNum = INIT_ARG_STACK_SLOT;
947 argTableSize = numArgs; // the allocated table size
948 argsComplete = false;
951 if (argTableSize == 0)
954 argTable = new(compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
957 /*****************************************************************************
959 * fgArgInfo Copy Constructor
961 * This method needs to act like a copy constructor for fgArgInfo.
962 * The newCall needs to have its fgArgInfo initialized such that
963 * we have newCall that is an exact copy of the oldCall.
964 * We have to take care since the argument information
965 * in the argTable contains pointers that must point to the
966 * new arguments and not the old arguments.
968 fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
970 assert(oldCall->IsCall());
971 assert(newCall->IsCall());
973 fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
975 compiler = oldArgInfo->compiler;;
976 callTree = newCall; assert(newCall->IsCall());
977 argCount = 0; // filled in arg count, starts at zero
978 nextSlotNum = INIT_ARG_STACK_SLOT;
979 stkLevel = oldArgInfo->stkLevel;
980 argTableSize = oldArgInfo->argTableSize;
981 argsComplete = false;
983 if (argTableSize > 0)
985 argTable = new(compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
986 for (unsigned inx=0; inx<argTableSize; inx++)
988 argTable[inx] = NULL;
992 assert(oldArgInfo->argsComplete);
994 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
995 // so we can iterate over these argument lists more uniformly.
996 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
997 GenTreeArgList* newArgs;
998 GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs);
999 GenTreeArgList* oldArgs;
1000 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
1002 if (newCall->gtCall.gtCallObjp == NULL)
1004 assert(oldCall->gtCall.gtCallObjp == NULL);
1005 newArgs = newCall->gtCall.gtCallArgs;
1006 oldArgs = oldCall->gtCall.gtCallArgs;
1010 assert(oldCall->gtCall.gtCallObjp != NULL);
1011 newArgObjp.Current() = newCall->gtCall.gtCallArgs;
1012 newArgs = &newArgObjp;
1013 oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
1014 oldArgs = &oldArgObjp;
1019 GenTreeArgList* newParent = NULL;
1020 GenTreeArgList* oldParent = NULL;
1021 fgArgTabEntryPtr * oldArgTable = oldArgInfo->argTable;
1022 bool scanRegArgs = false;
1026 /* Get hold of the next argument values for the oldCall and newCall */
1028 newCurr = newArgs->Current();
1029 oldCurr = oldArgs->Current();
1030 if (newArgs != &newArgObjp)
1032 newParent = newArgs;
1033 oldParent = oldArgs;
1037 assert(newParent == NULL && oldParent == NULL);
1039 newArgs = newArgs->Rest();
1040 oldArgs = oldArgs->Rest();
1042 fgArgTabEntryPtr oldArgTabEntry = NULL;
1043 fgArgTabEntryPtr newArgTabEntry = NULL;
1045 for (unsigned inx=0; inx<argTableSize; inx++)
1047 oldArgTabEntry = oldArgTable[inx];
1049 if (oldArgTabEntry->parent == oldParent)
1051 assert((oldParent == NULL) == (newParent == NULL));
1053 // We have found the matching "parent" field in oldArgTabEntry
1055 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1057 // First block copy all fields
1059 *newArgTabEntry = *oldArgTabEntry;
1061 // Then update all GenTreePtr fields in the newArgTabEntry
1063 newArgTabEntry->parent = newParent;
1065 // The node field is likely to have been updated
1066 // to point at a node in the gtCallLateArgs list
1068 if (oldArgTabEntry->node == oldCurr)
1070 // node is not pointing into the gtCallLateArgs list
1071 newArgTabEntry->node = newCurr;
1075 // node must be pointing into the gtCallLateArgs list
1077 // We will fix this pointer up in the next loop
1079 newArgTabEntry->node = NULL; // For now we assign a NULL to this field
1084 // Now initialize the proper element in the argTable array
1086 argTable[inx] = newArgTabEntry;
1090 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1092 assert(newArgTabEntry != NULL);
1097 newArgs = newCall->gtCall.gtCallLateArgs;
1098 oldArgs = oldCall->gtCall.gtCallLateArgs;
1102 /* Get hold of the next argument values for the oldCall and newCall */
1104 assert(newArgs->IsList());
1106 newCurr = newArgs->Current();
1107 newArgs = newArgs->Rest();
1109 assert(oldArgs->IsList());
1111 oldCurr = oldArgs->Current();
1112 oldArgs = oldArgs->Rest();
1114 fgArgTabEntryPtr oldArgTabEntry = NULL;
1115 fgArgTabEntryPtr newArgTabEntry = NULL;
1117 for (unsigned inx=0; inx<argTableSize; inx++)
1119 oldArgTabEntry = oldArgTable[inx];
1121 if (oldArgTabEntry->node == oldCurr)
1123 // We have found the matching "node" field in oldArgTabEntry
1125 newArgTabEntry = argTable[inx];
1126 assert(newArgTabEntry != NULL);
1128 // update the "node" GenTreePtr fields in the newArgTabEntry
1130 assert(newArgTabEntry->node == NULL); // We previously assigned NULL to this field
1132 newArgTabEntry->node = newCurr;
1139 argCount = oldArgInfo->argCount;
1140 nextSlotNum = oldArgInfo->nextSlotNum;
1141 argsComplete = true;
1145 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1147 assert(argCount < argTableSize);
1148 argTable[argCount] = curArgTabEntry;
1152 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1159 fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
1161 curArgTabEntry->argNum = argNum;
1162 curArgTabEntry->node = node;
1163 curArgTabEntry->parent = parent;
1164 curArgTabEntry->regNum = regNum;
1165 curArgTabEntry->slotNum = 0;
1166 curArgTabEntry->numRegs = numRegs;
1167 curArgTabEntry->numSlots = 0;
1168 curArgTabEntry->alignment = alignment;
1169 curArgTabEntry->lateArgInx = (unsigned)-1;
1170 curArgTabEntry->tmpNum = (unsigned)-1;
1171 curArgTabEntry->isSplit = false;
1172 curArgTabEntry->isTmp = false;
1173 curArgTabEntry->needTmp = false;
1174 curArgTabEntry->needPlace = false;
1175 curArgTabEntry->processed = false;
1176 curArgTabEntry->isHfaRegArg = false;
1177 curArgTabEntry->isBackFilled = false;
1178 curArgTabEntry->isNonStandard = false;
1180 AddArg(curArgTabEntry);
1181 return curArgTabEntry;
1184 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1185 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1191 const bool isStruct,
1192 const regNumber otherRegNum,
1193 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1195 fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1196 assert(curArgTabEntry != nullptr);
1198 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1199 // PlaceHolder node (in case of needed late argument, for example.)
1200 // This requires using of an extra flag. At creation time the state is right, so
1201 // and this assert enforces that.
1202 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1203 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1204 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1206 if (isStruct && structDescPtr != nullptr)
1208 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1211 return curArgTabEntry;
1213 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1215 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
1220 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1222 fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
1224 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1226 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1227 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1228 // PlaceHolder node (in case of needed late argument, for example.)
1229 // This reqires using of an extra flag. At creation time the state is right, so
1230 // and this assert enforces that.
1231 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1232 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1233 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1235 curArgTabEntry->argNum = argNum;
1236 curArgTabEntry->node = node;
1237 curArgTabEntry->parent = parent;
1238 curArgTabEntry->regNum = REG_STK;
1239 curArgTabEntry->slotNum = nextSlotNum;
1240 curArgTabEntry->numRegs = 0;
1241 curArgTabEntry->numSlots = numSlots;
1242 curArgTabEntry->alignment = alignment;
1243 curArgTabEntry->lateArgInx = (unsigned) -1;
1244 curArgTabEntry->tmpNum = (unsigned) -1;
1245 curArgTabEntry->isSplit = false;
1246 curArgTabEntry->isTmp = false;
1247 curArgTabEntry->needTmp = false;
1248 curArgTabEntry->needPlace = false;
1249 curArgTabEntry->processed = false;
1250 curArgTabEntry->isHfaRegArg = false;
1251 curArgTabEntry->isBackFilled = false;
1252 curArgTabEntry->isNonStandard = false;
1254 AddArg(curArgTabEntry);
1256 nextSlotNum += numSlots;
1257 return curArgTabEntry;
1260 void fgArgInfo::RemorphReset()
1262 nextSlotNum = INIT_ARG_STACK_SLOT;
1265 fgArgTabEntry* fgArgInfo::RemorphRegArg(unsigned argNum,
1272 fgArgTabEntryPtr curArgTabEntry = NULL;
1273 unsigned regArgInx = 0;
1276 for (inx=0; inx < argCount; inx++)
1278 curArgTabEntry = argTable[inx];
1279 if (curArgTabEntry->argNum == argNum)
1284 if (curArgTabEntry->parent != NULL)
1286 assert(curArgTabEntry->parent->IsList());
1287 argx = curArgTabEntry->parent->Current();
1288 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1292 argx = curArgTabEntry->node;
1301 // if this was a nonstandard arg the table is definitive
1302 if (curArgTabEntry->isNonStandard)
1303 regNum = curArgTabEntry->regNum;
1305 assert(curArgTabEntry->argNum == argNum);
1306 assert(curArgTabEntry->regNum == regNum);
1307 assert(curArgTabEntry->alignment == alignment);
1308 assert(curArgTabEntry->parent == parent);
1310 if (curArgTabEntry->node != node)
1312 GenTreePtr argx = NULL;
1313 unsigned regIndex = 0;
1315 /* process the register argument list */
1316 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1318 argx = list->Current();
1319 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1320 if (regIndex == regArgInx)
1323 assert(regIndex == regArgInx);
1324 assert(regArgInx == curArgTabEntry->lateArgInx);
1326 if (curArgTabEntry->node != argx)
1328 curArgTabEntry->node = argx;
1331 return curArgTabEntry;
1334 void fgArgInfo::RemorphStkArg(unsigned argNum,
1340 fgArgTabEntryPtr curArgTabEntry = NULL;
1341 bool isRegArg = false;
1342 unsigned regArgInx = 0;
1346 for (inx=0; inx < argCount; inx++)
1348 curArgTabEntry = argTable[inx];
1350 if (curArgTabEntry->parent != NULL)
1352 assert(curArgTabEntry->parent->IsList());
1353 argx = curArgTabEntry->parent->Current();
1354 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1358 argx = curArgTabEntry->node;
1362 if (curArgTabEntry->argNum == argNum)
1369 nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment);
1371 assert(curArgTabEntry->argNum == argNum);
1372 assert(curArgTabEntry->slotNum == nextSlotNum);
1373 assert(curArgTabEntry->numSlots == numSlots);
1374 assert(curArgTabEntry->alignment == alignment);
1375 assert(curArgTabEntry->parent == parent);
1376 assert(parent->IsList());
1378 #if FEATURE_FIXED_OUT_ARGS
1379 if (curArgTabEntry->node != node)
1383 GenTreePtr argx = NULL;
1384 unsigned regIndex = 0;
1386 /* process the register argument list */
1387 for (GenTreeArgList * list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1389 argx = list->Current();
1390 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1391 if (regIndex == regArgInx)
1394 assert(regIndex == regArgInx);
1395 assert(regArgInx == curArgTabEntry->lateArgInx);
1397 if (curArgTabEntry->node != argx)
1399 curArgTabEntry->node = argx;
1404 assert(parent->Current() == node);
1405 curArgTabEntry->node = node;
1409 curArgTabEntry->node = node;
1412 nextSlotNum += numSlots;
1415 void fgArgInfo::SplitArg(unsigned argNum,
1419 fgArgTabEntryPtr curArgTabEntry = NULL;
1420 assert(argNum < argCount);
1421 for (unsigned inx=0; inx < argCount; inx++)
1423 curArgTabEntry = argTable[inx];
1424 if (curArgTabEntry->argNum == argNum)
1428 assert(numRegs > 0);
1429 assert(numSlots > 0);
1431 curArgTabEntry->isSplit = true;
1432 curArgTabEntry->numRegs = numRegs;
1433 curArgTabEntry->numSlots = numSlots;
1435 nextSlotNum += numSlots;
1438 void fgArgInfo::EvalToTmp(unsigned argNum,
1442 fgArgTabEntryPtr curArgTabEntry = NULL;
1443 assert(argNum < argCount);
1444 for (unsigned inx=0; inx < argCount; inx++)
1446 curArgTabEntry = argTable[inx];
1447 if (curArgTabEntry->argNum == argNum)
1450 assert(curArgTabEntry->parent->Current() == newNode);
1452 curArgTabEntry->node = newNode;
1453 curArgTabEntry->tmpNum = tmpNum;
1454 curArgTabEntry->isTmp = true;
1457 void fgArgInfo::ArgsComplete()
1459 bool hasStackArgs = false;
1460 bool hasStructRegArg = false;
1462 for (unsigned curInx = 0; curInx < argCount; curInx++)
1464 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1465 assert(curArgTabEntry != NULL);
1466 GenTreePtr argx = curArgTabEntry->node;
1468 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1469 // If this is a struct, mark it for needing a tempVar.
1470 // In the copyblk and store this should have minimal perf impact since
1471 // the local vars where we copy/store to already exist and the logic for temp
1472 // var will not create a new one if it creates a tempVar from another tempVar.
1473 // (Debugging through the code, there was no new copy of data created, neither a new tempVar.)
1474 // The need for this arise from Lower::LowerArg.
1475 // In case of copyblk and store operation, the NewPutArg method will
1476 // not be invoked and the struct will not be loaded to be passed in
1477 // registers or by value on the stack.
1478 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
1480 curArgTabEntry->needTmp = true;
1482 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1484 if (curArgTabEntry->regNum == REG_STK)
1486 hasStackArgs = true;
1487 #if !FEATURE_FIXED_OUT_ARGS
1488 // On x86 we use push instructions to pass arguments:
1489 // The non-register arguments are evaluated and pushed in order
1490 // and they are never evaluated into temps
1495 else // we have a register argument, next we look for a struct type.
1497 if (varTypeIsStruct(argx)
1498 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
1500 hasStructRegArg = true;
1504 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1505 and every earlier argument (except constants) must be evaluated into temps
1506 since there may be other arguments that follow and they may use the value being assigned.
1508 EXAMPLE: ArgTab is "a, a=5, a"
1509 -> when we see the second arg "a=5"
1510 we know the first two arguments "a, a=5" have to be evaluated into temps
1512 For the case of an assignment, we only know that there exist some assignment someplace
1513 in the tree. We don't know what is being assigned so we are very conservative here
1514 and assume that any local variable could have been assigned.
1517 if (argx->gtFlags & GTF_ASG)
1519 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1520 // a tmp, then we need a temp in the late arg list.
1521 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1522 #ifdef FEATURE_FIXED_OUT_ARGS
1523 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1524 // that we only have late non-register args when that feature is on.
1525 #endif // FEATURE_FIXED_OUT_ARGS
1528 curArgTabEntry->needTmp = true;
1531 // For all previous arguments, unless they are a simple constant
1532 // we require that they be evaluated into temps
1533 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1535 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1536 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1538 assert(prevArgTabEntry->node);
1539 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1541 prevArgTabEntry->needTmp = true;
1546 #if FEATURE_FIXED_OUT_ARGS
1547 // Like calls, if this argument has a tree that will do an inline throw,
1548 // a call to a jit helper, then we need to treat it like a call (but only
1549 // if there are/were any stack args).
1550 // This means unnesting, sorting, etc. Technically this is overly
1551 // conservative, but I want to avoid as much special-case debug-only code
1552 // as possible, so leveraging the GTF_CALL flag is the easiest.
1553 if (!(argx->gtFlags & GTF_CALL) &&
1554 (argx->gtFlags & GTF_EXCEPT) &&
1556 compiler->opts.compDbgCode &&
1557 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1559 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1561 if (otherInx == curInx)
1564 if (argTable[otherInx]->regNum == REG_STK)
1566 argx->gtFlags |= GTF_CALL;
1571 #endif // FEATURE_FIXED_OUT_ARGS
1573 /* If it contains a call (GTF_CALL) then itself and everything before the call
1574 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1575 has to be kept in the right order since we will move the call to the first position)
1577 For calls we don't have to be quite as conservative as we are with an assignment
1578 since the call won't be modifying any non-address taken LclVars.
1581 if (argx->gtFlags & GTF_CALL)
1583 if (argCount > 1) // If this is not the only argument
1585 curArgTabEntry->needTmp = true;
1587 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1589 // Spill all arguments that are floating point calls
1590 curArgTabEntry->needTmp = true;
1593 // All previous arguments may need to be evaluated into temps
1594 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1596 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1597 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1598 assert(prevArgTabEntry->node);
1600 // For all previous arguments, if they have any GTF_ALL_EFFECT
1601 // we require that they be evaluated into a temp
1602 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1604 prevArgTabEntry->needTmp = true;
1606 #if FEATURE_FIXED_OUT_ARGS
1607 // Or, if they are stored into the FIXED_OUT_ARG area
1608 // we require that they be moved to the gtCallLateArgs
1609 // and replaced with a placeholder node
1610 else if (prevArgTabEntry->regNum == REG_STK)
1612 prevArgTabEntry->needPlace = true;
1618 #ifndef LEGACY_BACKEND
1619 // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
1620 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1622 // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
1623 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1625 #if FEATURE_MULTIREG_ARGS
1626 if ((argx->TypeGet() == TYP_STRUCT) &&
1627 (curArgTabEntry->numRegs > 1) &&
1628 (curArgTabEntry->needTmp == false))
1630 if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
1632 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1633 curArgTabEntry->needTmp = true;
1637 // We call gtPrepareCost to measure the cost of evaluating this tree
1638 compiler->gtPrepareCost(argx);
1640 if (argx->gtCostEx > (6 * IND_COST_EX))
1642 // Spill multireg struct arguments that are expensive to evaluate twice
1643 curArgTabEntry->needTmp = true;
1645 else if (argx->OperGet() == GT_OBJ)
1647 GenTreeObj* argObj = argx->AsObj();
1648 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1649 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1656 // Spill any GT_OBJ multireg structs that are difficult to extract
1658 // When we have a GT_OBJ of a struct with the above sizes we would need
1659 // to use 3 or 4 load instructions to load the exact size of this struct.
1660 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1661 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1662 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1665 curArgTabEntry->needTmp = true;
1674 #endif // FEATURE_MULTIREG_ARGS
1675 #endif // LEGACY_BACKEND
1679 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1680 // if we don't have qmarks, then it doesn't matter.
1681 // So check for Qmark's globally once here, instead of inside the loop.
1683 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1685 #if FEATURE_FIXED_OUT_ARGS
1687 // For Arm/x64 we only care because we can't reorder a register
1688 // argument that uses GT_LCLHEAP. This is an optimization to
1689 // save a check inside the below loop.
1691 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1695 const bool hasStackArgsWeCareAbout = hasStackArgs;
1697 #endif // FEATURE_FIXED_OUT_ARGS
1699 // If we have any stack args we have to force the evaluation
1700 // of any arguments passed in registers that might throw an exception
1702 // Technically we only a required to handle the following two cases:
1703 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1704 // a GT_LCLHEAP node that allocates stuff on the stack
1706 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1708 for (unsigned curInx = 0; curInx < argCount; curInx++)
1710 fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL);
1711 GenTreePtr argx = curArgTabEntry->node;
1713 // Examine the register args that are currently not marked needTmp
1715 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1717 if (hasStackArgsWeCareAbout)
1719 #if !FEATURE_FIXED_OUT_ARGS
1720 // On x86 we previously recorded a stack depth of zero when
1721 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1722 // Thus we can not reorder the argument after any stack based argument
1723 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1724 // check for it explicitly
1726 if (argx->gtFlags & GTF_EXCEPT)
1728 curArgTabEntry->needTmp = true;
1732 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1734 if (argx->gtFlags & GTF_EXCEPT)
1736 assert(compiler->compLocallocUsed);
1738 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1740 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1742 curArgTabEntry->needTmp = true;
1748 if (hasStructRegArgWeCareAbout)
1750 // Returns true if a GT_QMARK node is encountered in the argx tree
1752 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1754 curArgTabEntry->needTmp = true;
1762 argsComplete = true;
1765 void fgArgInfo::SortArgs()
1767 assert(argsComplete == true);
1769 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1770 The idea is to move all "simple" arguments like constants and local vars
1771 to the end of the table, and move the complex arguments towards the beginning
1772 of the table. This will help prevent registers from being spilled by
1773 allowing us to evaluate the more complex arguments before the simpler arguments.
1774 The argTable ends up looking like:
1775 +------------------------------------+ <--- argTable[argCount - 1]
1777 +------------------------------------+
1778 | local var / local field |
1779 +------------------------------------+
1780 | remaining arguments sorted by cost |
1781 +------------------------------------+
1782 | temps (argTable[].needTmp = true) |
1783 +------------------------------------+
1784 | args with calls (GTF_CALL) |
1785 +------------------------------------+ <--- argTable[0]
1789 if (compiler->verbose)
1791 printf("\nSorting the arguments:\n");
1795 /* Set the beginning and end for the new argument table */
1798 unsigned begTab = 0;
1799 unsigned endTab = argCount - 1;
1800 unsigned argsRemaining = argCount;
1802 // First take care of arguments that are constants.
1803 // [We use a backward iterator pattern]
1809 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1811 if (curArgTabEntry->regNum != REG_STK)
1814 // Skip any already processed args
1816 if (!curArgTabEntry->processed)
1818 GenTreePtr argx = curArgTabEntry->node;
1820 // put constants at the end of the table
1822 if (argx->gtOper == GT_CNS_INT)
1824 noway_assert(curInx <= endTab);
1826 curArgTabEntry->processed = true;
1828 // place curArgTabEntry at the endTab position by performing a swap
1830 if (curInx != endTab)
1832 argTable[curInx] = argTable[endTab];
1833 argTable[endTab] = curArgTabEntry;
1840 } while (curInx > 0);
1842 if (argsRemaining > 0)
1844 // Next take care of arguments that are calls.
1845 // [We use a forward iterator pattern]
1847 for (curInx = begTab; curInx <= endTab; curInx++)
1849 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1851 // Skip any already processed args
1853 if (!curArgTabEntry->processed)
1855 GenTreePtr argx = curArgTabEntry->node;
1857 // put calls at the beginning of the table
1859 if (argx->gtFlags & GTF_CALL)
1861 curArgTabEntry->processed = true;
1863 // place curArgTabEntry at the begTab position by performing a swap
1865 if (curInx != begTab)
1867 argTable[curInx] = argTable[begTab];
1868 argTable[begTab] = curArgTabEntry;
1878 if (argsRemaining > 0)
1880 // Next take care arguments that are temps.
1881 // These temps come before the arguments that are
1882 // ordinary local vars or local fields
1883 // since this will give them a better chance to become
1884 // enregistered into their actual argument register.
1885 // [We use a forward iterator pattern]
1887 for (curInx = begTab; curInx <= endTab; curInx++)
1889 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1891 // Skip any already processed args
1893 if (!curArgTabEntry->processed)
1895 if (curArgTabEntry->needTmp)
1897 curArgTabEntry->processed = true;
1899 // place curArgTabEntry at the begTab position by performing a swap
1901 if (curInx != begTab)
1903 argTable[curInx] = argTable[begTab];
1904 argTable[begTab] = curArgTabEntry;
1914 if (argsRemaining > 0)
1916 // Next take care of local var and local field arguments.
1917 // These are moved towards the end of the argument evaluation.
1918 // [We use a backward iterator pattern]
1920 curInx = endTab + 1;
1924 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1926 // Skip any already processed args
1928 if (!curArgTabEntry->processed)
1930 GenTreePtr argx = curArgTabEntry->node;
1932 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1934 noway_assert(curInx <= endTab);
1936 curArgTabEntry->processed = true;
1938 // place curArgTabEntry at the endTab position by performing a swap
1940 if (curInx != endTab)
1942 argTable[curInx] = argTable[endTab];
1943 argTable[endTab] = curArgTabEntry;
1950 } while (curInx > begTab);
1953 // Finally, take care of all the remaining arguments.
1954 // Note that we fill in one arg at a time using a while loop.
1955 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1956 while (argsRemaining > 0)
1958 /* Find the most expensive arg remaining and evaluate it next */
1960 fgArgTabEntryPtr expensiveArgTabEntry = NULL;
1961 unsigned expensiveArg = UINT_MAX;
1962 unsigned expensiveArgCost = 0;
1964 // [We use a forward iterator pattern]
1966 for (curInx = begTab; curInx <= endTab; curInx++)
1968 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1970 // Skip any already processed args
1972 if (!curArgTabEntry->processed)
1974 GenTreePtr argx = curArgTabEntry->node;
1976 // We should have already handled these kinds of args
1977 assert (argx->gtOper != GT_LCL_VAR);
1978 assert (argx->gtOper != GT_LCL_FLD);
1979 assert (argx->gtOper != GT_CNS_INT);
1981 // This arg should either have no persistent side effects or be the last one in our table
1982 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1984 if (argsRemaining == 1)
1986 // This is the last arg to place
1987 expensiveArg = curInx;
1988 expensiveArgTabEntry = curArgTabEntry;
1989 assert(begTab == endTab);
1996 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1997 compiler->gtPrepareCost(argx);
2000 if (argx->gtCostEx > expensiveArgCost)
2002 // Remember this arg as the most expensive one that we have yet seen
2003 expensiveArgCost = argx->gtCostEx;
2004 expensiveArg = curInx;
2005 expensiveArgTabEntry = curArgTabEntry;
2011 noway_assert(expensiveArg != UINT_MAX);
2013 // put the most expensive arg towards the beginning of the table
2015 expensiveArgTabEntry->processed = true;
2017 // place expensiveArgTabEntry at the begTab position by performing a swap
2019 if (expensiveArg != begTab)
2021 argTable[expensiveArg] = argTable[begTab];
2022 argTable[begTab] = expensiveArgTabEntry;
2028 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2031 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2032 // and regArgsRemaining should be zero
2033 assert(begTab == (endTab + 1));
2034 assert(argsRemaining == 0);
2036 #if !FEATURE_FIXED_OUT_ARGS
2037 // Finally build the regArgList
2039 callTree->gtCall.regArgList = NULL;
2040 callTree->gtCall.regArgListCount = regCount;
2042 unsigned regInx = 0;
2043 for (curInx = 0; curInx < argCount; curInx++)
2045 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2047 if (curArgTabEntry->regNum != REG_STK)
2049 // Encode the argument register in the register mask
2051 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2055 #endif // !FEATURE_FIXED_OUT_ARGS
2060 //------------------------------------------------------------------------------
2061 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2062 // We need this to be done in order to enforce ordering
2063 // of the evaluation of arguments.
2066 // tmpVarNum - the var num which we clone into the newly created temp var.
2069 // the newly created temp var tree.
2071 GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum
2072 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2074 LclVarDsc * varDsc = &lvaTable[tmpVarNum];
2075 assert(varDsc->lvIsTemp);
2076 var_types type = varDsc->TypeGet();
2078 // Create a copy of the temp to go into the late argument list
2079 GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
2080 GenTreePtr addrNode = nullptr;
2082 if (varTypeIsStruct(type))
2085 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2087 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2089 arg->gtFlags |= GTF_DONT_CSE;
2091 // If it is passed in registers, don't get the address of the var. Make it a
2092 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2093 if (passedInRegisters)
2095 arg->ChangeOper(GT_LCL_FLD);
2100 arg = gtNewOperNode(GT_ADDR, type, arg);
2104 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2106 unsigned structSize = lvaLclExactSize(tmpVarNum);
2110 case 1: type = TYP_BYTE; break;
2111 case 2: type = TYP_SHORT; break;
2112 #if defined (_TARGET_AMD64_)
2113 case 4: type = TYP_INT; break;
2114 #elif defined(_TARGET_ARM64_)
2116 case 4: type = TYP_INT; break;
2119 case 7: type = TYP_I_IMPL; break;
2120 #endif // defined (_TARGET_ARM64_)
2122 switch (*lvaGetGcLayout(tmpVarNum))
2141 // If we didn't change the type of the struct, it means
2142 // its structure doesn't support to be passed directly through a
2143 // register, so we need to pass a pointer to the destination where
2144 // where we copied the struct to.
2145 if (type == varDsc->TypeGet())
2147 #if FEATURE_MULTIREG_ARGS
2148 #ifdef _TARGET_ARM64_
2149 assert(varTypeIsStruct(type));
2150 if (varDsc->lvIsMultiregStruct())
2152 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2153 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2154 // Create a GT_OBJ for the argument
2155 // This will be passed by value in two registers
2156 arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2159 // Create an Obj of the temp to use it as a call argument.
2160 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2163 #endif // _TARGET_ARM64_
2164 #endif // FEATURE_MULTIREG_ARGS
2166 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
2170 else // type was changed from a struct to a scalar type
2172 arg->ChangeOper(GT_LCL_FLD);
2175 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2177 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2179 // other targets, we pass the struct by value
2180 assert(varTypeIsStruct(type));
2182 arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2185 // Get a new Obj node temp to use it as a call argument
2186 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2187 arg->gtFlags |= GTF_EXCEPT;
2189 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2191 } // (varTypeIsStruct(type))
2193 if (addrNode != nullptr)
2195 assert(addrNode->gtOper == GT_ADDR);
2197 // This will prevent this LclVar from being optimized away
2198 lvaSetVarAddrExposed(tmpVarNum);
2200 // the child of a GT_ADDR is required to have this flag set
2201 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2207 void fgArgInfo::EvalArgsToTemps()
2209 assert(argsSorted == true);
2211 unsigned regArgInx = 0;
2212 // Now go through the argument table and perform the necessary evaluation into temps
2213 GenTreeArgList* tmpRegArgNext = NULL;
2214 for (unsigned curInx = 0; curInx < argCount; curInx++)
2216 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2218 GenTreePtr argx = curArgTabEntry->node;
2219 GenTreePtr setupArg = NULL;
2222 #if !FEATURE_FIXED_OUT_ARGS
2223 // Only ever set for FEATURE_FIXED_OUT_ARGS
2224 assert(curArgTabEntry->needPlace == false);
2226 // On x86 and other archs that use push instructions to pass arguments:
2227 // Only the register arguments need to be replaced with placeholder nodes.
2228 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2230 if (curArgTabEntry->regNum == REG_STK)
2234 if (curArgTabEntry->needTmp)
2238 if (curArgTabEntry->isTmp == true)
2240 // Create a copy of the temp to go into the late argument list
2241 tmpVarNum = curArgTabEntry->tmpNum;
2242 defArg = compiler->fgMakeTmpArgNode(
2244 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters));
2246 // mark the original node as a late argument
2247 argx->gtFlags |= GTF_LATE_ARG;
2251 // Create a temp assignment for the argument
2252 // Put the temp in the gtCallLateArgs list
2254 if (compiler->verbose)
2256 printf("Argument with 'side effect'...\n");
2257 compiler->gtDispTree(argx);
2261 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2262 noway_assert(argx->gtType != TYP_STRUCT);
2265 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2266 if (argx->gtOper == GT_MKREFANY)
2268 // For GT_MKREFANY, typically the actual struct copying does
2269 // not have any side-effects and can be delayed. So instead
2270 // of using a temp for the whole struct, we can just use a temp
2271 // for operand that that has a side-effect
2273 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2275 operand = argx->gtOp.gtOp1;
2277 // In the early argument evaluation, place an assignment to the temp
2278 // from the source operand of the mkrefany
2279 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2281 // Replace the operand for the mkrefany with the new temp.
2282 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2284 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2286 operand = argx->gtOp.gtOp2;
2288 // In the early argument evaluation, place an assignment to the temp
2289 // from the source operand of the mkrefany
2290 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2292 // Replace the operand for the mkrefany with the new temp.
2293 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2297 if (setupArg != NULL)
2299 // Now keep the mkrefany for the late argument list
2302 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2303 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2307 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2309 #ifndef LEGACY_BACKEND
2310 if (compiler->fgOrder == Compiler::FGOrderLinear)
2312 // We'll reference this temporary variable just once
2313 // when we perform the function call after
2314 // setting up this argument.
2315 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2316 varDsc->lvRefCnt = 1;
2318 #endif // !LEGACY_BACKEND
2320 if (setupArg->OperIsCopyBlkOp())
2321 setupArg = compiler->fgMorphCopyBlock(setupArg);
2323 /* Create a copy of the temp to go to the late argument list */
2324 defArg = compiler->gtNewLclvNode(tmpVarNum, genActualType(argx->gtType));
2326 curArgTabEntry->isTmp = true;
2327 curArgTabEntry->tmpNum = tmpVarNum;
2330 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2331 // might have left holes in the used registers (see
2332 // fgAddSkippedRegsInPromotedStructArg).
2333 // Too bad we're not that smart for these intermediate temps...
2334 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2336 regNumber argReg = curArgTabEntry->regNum;
2337 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2338 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2340 argReg = genRegArgNext(argReg);
2341 allUsedRegs |= genRegMask(argReg);
2343 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2345 #endif // _TARGET_ARM_
2348 /* mark the assignment as a late argument */
2349 setupArg->gtFlags |= GTF_LATE_ARG;
2352 if (compiler->verbose)
2354 printf("\n Evaluate to a temp:\n");
2355 compiler->gtDispTree(setupArg);
2360 else // curArgTabEntry->needTmp == false
2363 // Only register args are replaced with placeholder nodes
2364 // and the stack based arguments are evaluated and pushed in order.
2366 // On Arm/x64 - When needTmp is false and needPlace is false,
2367 // the non-register arguments are evaluated and stored in order.
2368 // When needPlace is true we have a nested call that comes after
2369 // this argument so we have to replace it in the gtCallArgs list
2370 // (the initial argument evaluation list) with a placeholder.
2372 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2375 /* No temp needed - move the whole node to the gtCallLateArgs list */
2377 /* The argument is deferred and put in the late argument list */
2381 // Create a placeholder node to put in its place in gtCallLateArgs.
2383 // For a struct type we also need to record the class handle of the arg.
2384 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2386 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2388 // All structs are either passed (and retyped) as integral types, OR they
2389 // are passed by reference.
2390 noway_assert(argx->gtType != TYP_STRUCT);
2392 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2394 if (varTypeIsStruct(defArg))
2396 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2397 GenTreePtr defArgTmp = defArg;
2399 // The GT_OBJ may be be a child of a GT_COMMA.
2400 while (defArgTmp->gtOper == GT_COMMA)
2402 defArgTmp = defArgTmp->gtOp.gtOp2;
2404 assert(varTypeIsStruct(defArgTmp));
2406 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2407 if (defArgTmp->gtOper == GT_MKREFANY)
2409 clsHnd = compiler->impGetRefAnyClass();
2411 else if (defArgTmp->gtOper == GT_OBJ)
2413 clsHnd = defArgTmp->AsObj()->gtClass;
2417 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2421 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2423 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2425 /* mark the placeholder node as a late argument */
2426 setupArg->gtFlags |= GTF_LATE_ARG;
2429 if (compiler->verbose)
2431 if (curArgTabEntry->regNum == REG_STK)
2433 printf("Deferred stack argument :\n");
2437 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2440 compiler->gtDispTree(argx);
2441 printf("Replaced with placeholder node:\n");
2442 compiler->gtDispTree(setupArg);
2447 if (setupArg != NULL)
2449 if (curArgTabEntry->parent)
2451 GenTreePtr parent = curArgTabEntry->parent;
2452 /* a normal argument from the list */
2453 noway_assert(parent->IsList());
2454 noway_assert(parent->gtOp.gtOp1 == argx);
2456 parent->gtOp.gtOp1 = setupArg;
2460 /* must be the gtCallObjp */
2461 noway_assert(callTree->gtCall.gtCallObjp == argx);
2463 callTree->gtCall.gtCallObjp = setupArg;
2467 /* deferred arg goes into the late argument list */
2469 if (tmpRegArgNext == NULL)
2471 tmpRegArgNext = compiler->gtNewArgList(defArg);
2472 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2476 noway_assert(tmpRegArgNext->IsList());
2477 noway_assert(tmpRegArgNext->Current());
2478 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2479 tmpRegArgNext = tmpRegArgNext->Rest();
2482 curArgTabEntry->node = defArg;
2483 curArgTabEntry->lateArgInx = regArgInx++;
2487 if (compiler->verbose)
2489 printf("\nShuffled argument table: ");
2490 for (unsigned curInx = 0; curInx < argCount; curInx++)
2492 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2494 if (curArgTabEntry->regNum != REG_STK)
2496 printf("%s ", getRegName( curArgTabEntry->regNum ));
2504 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2506 assert(!IsUninitialized(stkLvl));
2507 this->stkLevel = stkLvl;
2510 unsigned fgArgInfo::RetrieveStkLevel()
2512 assert(!IsUninitialized(stkLevel));
2516 // Return a conservative estimate of the stack size in bytes.
2517 // It will be used only on the intercepted-for-host code path to copy the arguments.
2518 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2522 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2528 if (numArgs > MAX_REG_ARG)
2529 numStkArgs = numArgs - MAX_REG_ARG;
2533 return numStkArgs * REGSIZE_BYTES;
2536 //------------------------------------------------------------------------------
2537 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2538 // otherwise insert a comma form temp
2541 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2542 // evaluates ppTree to a temp and returns the result
2545 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2548 // The result tree MUST be added to the tree structure since the ref counts are
2549 // already incremented.
2551 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2553 GenTree* tree = *pOp;
2554 if (tree->IsLocal())
2556 auto result = gtClone(tree);
2557 if (lvaLocalVarRefCounted)
2559 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2565 GenTree* result = fgInsertCommaFormTemp(pOp);
2567 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2568 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2569 // be added by the caller.
2570 if (lvaLocalVarRefCounted)
2572 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2573 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2574 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2582 //------------------------------------------------------------------------------
2583 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2584 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2587 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2588 // evaluates ppTree to a temp and returns the result
2590 // structType - value type handle if the temp created is of TYP_STRUCT.
2593 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2596 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2598 GenTree* subTree = *ppTree;
2600 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2602 if (varTypeIsStruct(subTree))
2604 assert(structType != nullptr);
2605 lvaSetStruct(lclNum, structType, false);
2608 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2609 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2610 // setting type of lcl vars created.
2611 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2613 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2615 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2619 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2623 //------------------------------------------------------------------------
2624 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2627 // callNode - the call for which we are doing the argument morphing
2630 // Like most morph methods, this method returns the morphed node,
2631 // though in this case there are currently no scenarios where the
2632 // node itself is re-created.
2635 // This method is even less idempotent than most morph methods.
2636 // That is, it makes changes that should not be redone. It uses the existence
2637 // of gtCallLateArgs (the late arguments list) to determine if it has
2638 // already done that work.
2640 // The first time it is called (i.e. during global morphing), this method
2641 // computes the "late arguments". This is when it determines which arguments
2642 // need to be evaluated to temps prior to the main argument setup, and which
2643 // can be directly evaluated into the argument location. It also creates a
2644 // second argument list (gtCallLateArgs) that does the final placement of the
2645 // arguments, e.g. into registers or onto the stack.
2647 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2648 // evaluation of the arguments that might have side-effects, such as embedded
2649 // assignments, calls or possible throws. In these cases, it and earlier
2650 // arguments must be evaluated to temps.
2652 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2653 // if we have any nested calls, we need to defer the copying of the argument
2654 // into the fixed argument area until after the call. If the argument did not
2655 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2656 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2659 #pragma warning(push)
2660 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
2662 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
2664 GenTreeCall* call = callNode->AsCall();
2669 unsigned flagsSummary = 0;
2670 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2672 unsigned argIndex = 0;
2674 unsigned intArgRegNum = 0;
2675 unsigned fltArgRegNum = 0;
2677 regMaskTP argSkippedRegMask = RBM_NONE;
2679 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
2680 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2683 #if defined(_TARGET_X86_)
2684 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2686 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2689 unsigned argSlots = 0;
2690 unsigned nonRegPassedStructSlots = 0;
2691 bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
2692 bool callHasRetBuffArg = call->HasRetBufArg();
2694 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2695 bool callIsVararg = call->IsVarargs();
2698 bool hasNonStandardArg = false;
2699 #ifndef LEGACY_BACKEND
2700 // data structure for keeping track of non-standard args we insert
2701 // (args that have a special meaning and are not passed following the normal
2702 // calling convention or even in the normal arg regs.
2703 struct NonStandardArg
2709 ArrayStack<NonStandardArg> nonStandardArgs(this, 2);
2710 #endif // !LEGACY_BACKEND
2712 // Process the late arguments (which were determined by a previous caller).
2713 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2714 // may need to refer to it.
2715 if (lateArgsComputed)
2717 // We need to reMorph the gtCallLateArgs early since that is what triggers
2718 // the expression folding and we need to have the final folded gtCallLateArgs
2719 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2720 // with the folded tree that represents the final optimized argument nodes.
2722 // However if a range-check needs to be generated for any of these late
2723 // arguments we also need to "know" what the stack depth will be when we generate
2724 // code to branch to the throw range check failure block as that is part of the
2725 // GC information contract for that block.
2727 // Since the late arguments are evaluated last we have pushed all of the
2728 // other arguments on the stack before we evaluate these late arguments,
2729 // so we record the stack depth on the first morph call when lateArgsComputed
2730 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2732 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2733 fgPtrArgCntCur += callStkLevel;
2734 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2735 flagsSummary |= call->gtCallLateArgs->gtFlags;
2736 fgPtrArgCntCur -= callStkLevel;
2737 assert(call->fgArgInfo != nullptr);
2738 call->fgArgInfo->RemorphReset();
2742 // First we need to count the args
2743 unsigned numArgs = 0;
2744 if (call->gtCallObjp)
2746 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2752 // insert nonstandard args (outside the calling convention)
2754 #if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
2755 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2756 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2757 // convention for x86/SSE.
2758 if (!lateArgsComputed)
2760 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2762 assert(!call->gtCallCookie);
2763 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2764 // It will be used only on the intercepted-for-host code path to copy the arguments.
2766 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2767 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2768 NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, cns};
2771 nonStandardArgs.Push(nsa);
2773 else if (call->IsVirtualStub() &&
2774 (call->gtCallType == CT_INDIRECT) &&
2775 !call->IsTailCallViaHelper())
2777 // indirect VSD stubs need the base of the indirection cell to be
2778 // passed in addition. At this point that is the value in gtCallAddr.
2779 // The actual call target will be derived from gtCallAddr in call
2782 // If it is a VSD call getting dispatched via tail call helper,
2783 // fgMorphTailCall() would materialize stub addr as an additional
2784 // parameter added to the original arg list and hence no need to
2785 // add as a non-standard arg.
2787 GenTree* arg = call->gtCallAddr;
2788 if (arg->OperIsLocal())
2790 arg = gtClone(arg, true);
2794 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2795 call->gtFlags |= GTF_ASG;
2797 noway_assert(arg != nullptr);
2799 // And push the stub address onto the list of arguments
2800 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2803 NonStandardArg nsa = {REG_VIRTUAL_STUB_PARAM, arg};
2805 nonStandardArgs.Push(nsa);
2807 else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
2809 assert(!call->IsUnmanaged());
2811 // put cookie into R11
2812 GenTree* arg = call->gtCallCookie;
2813 noway_assert(arg != nullptr);
2814 call->gtCallCookie = nullptr;
2816 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2819 NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, arg};
2821 nonStandardArgs.Push(nsa);
2823 // put destination into R10
2824 arg = gtClone(call->gtCallAddr, true);
2825 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2828 NonStandardArg nsa2 = {REG_PINVOKE_TARGET_PARAM, arg};
2829 nonStandardArgs.Push(nsa2);
2831 // finally change this call to a helper call
2832 call->gtCallType = CT_HELPER;
2833 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2836 #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
2838 // Allocate the fgArgInfo for the call node;
2840 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2844 fgFixupStructReturn(call);
2846 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
2847 * During the first call to fgMorphArgs we also record the
2848 * information about late arguments we have in 'fgArgInfo'.
2849 * This information is used later to contruct the gtCallLateArgs */
2851 /* Process the 'this' argument value, if present */
2853 argx = call->gtCallObjp;
2857 argx = fgMorphTree(argx);
2858 call->gtCallObjp = argx;
2859 flagsSummary |= argx->gtFlags;
2861 assert(call->gtCallType == CT_USER_FUNC ||
2862 call->gtCallType == CT_INDIRECT);
2864 assert(argIndex == 0);
2866 /* We must fill in or update the argInfo table */
2868 if (!lateArgsComputed)
2870 assert(varTypeIsGC(call->gtCallObjp->gtType) ||
2871 (call->gtCallObjp->gtType == TYP_I_IMPL));
2873 /* this is a register argument - put it in the table */
2874 call->fgArgInfo->AddRegArg(argIndex,
2877 genMapIntRegArgNumToRegNum(intArgRegNum),
2880 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2884 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2889 /* this is a register argument - possibly update it in the table */
2890 call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
2892 // this can't be a struct.
2893 assert(argx->gtType != TYP_STRUCT);
2895 // FIXME: Issue #4025 Why do we need floating type for 'this' argument
2896 /* Increment the argument register count and argument index */
2897 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
2900 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2901 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
2908 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2909 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
2918 // Compute the maximum number of arguments that can be passed in registers.
2919 // For X86 we handle the varargs and unmanaged calling conventions
2921 if (call->gtFlags & GTF_CALL_POP_ARGS)
2923 noway_assert(intArgRegNum < MAX_REG_ARG);
2924 // No more register arguments for varargs (CALL_POP_ARGS)
2925 maxRegArgs = intArgRegNum;
2927 // Add in the ret buff arg
2928 if (callHasRetBuffArg)
2932 if (call->IsUnmanaged())
2934 noway_assert(intArgRegNum == 0);
2936 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
2938 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
2939 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
2940 call->gtCallArgs->gtOp.gtOp1->gtOper == GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
2948 // Add in the ret buff arg
2949 if (callHasRetBuffArg)
2952 #endif // _TARGET_X86_
2954 /* Morph the user arguments */
2956 #if defined(_TARGET_ARM_)
2958 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
2959 // to the "Procedure Call Standard for the ARM Architecture" document, especially
2960 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
2961 // appear in a lower-numbered register than floating point argument N. That is, argument
2962 // register allocation is not strictly increasing. To support this, we need to keep track of unused
2963 // floating-point argument registers that we can back-fill. We only support 4-byte float and
2964 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
2965 // only back-fill single registers, since there is no way with these types to create
2966 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
2967 // available (with 16 FP argument registers). Consider this code:
2969 // struct HFA { float x, y, z; }; // a three element HFA
2970 // void bar(float a1, // passed in f0
2971 // double a2, // passed in f2/f3; skip f1 for alignment
2972 // HFA a3, // passed in f4/f5/f6
2973 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
2974 // HFA a5, // passed in f10/f11/f12
2975 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill slots
2976 // float a7, // passed in f1 (back-filled)
2977 // float a8, // passed in f7 (back-filled)
2978 // float a9, // passed in f13 (back-filled)
2979 // float a10) // passed on the stack in [OutArg+0]
2981 // Note that if we ever support FP types with larger alignment requirements, then there could
2982 // be more than single register back-fills.
2984 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
2985 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
2986 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
2987 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
2988 // and prevent any additional floating-point arguments from going in registers.
2990 bool anyFloatStackArgs = false;
2992 #endif // _TARGET_ARM_
2994 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2995 bool nonRegPassableStruct = false;
2996 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
2997 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2999 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3000 bool hasMultiregStructArgs = false;
3001 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
3003 GenTreePtr * parentArgx = &args->gtOp.gtOp1;
3005 #if FEATURE_MULTIREG_ARGS
3006 if (!hasStructArgument)
3008 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3010 #endif // FEATURE_MULTIREG_ARGS
3012 argx = fgMorphTree(*parentArgx);
3014 flagsSummary |= argx->gtFlags;
3016 assert(args->IsList());
3017 assert(argx == args->Current());
3019 /* Change the node to TYP_I_IMPL so we don't report GC info
3020 * NOTE: We deferred this from the importer because of the inliner */
3022 if (argx->IsVarAddr())
3023 argx->gtType = TYP_I_IMPL;
3025 bool passUsingFloatRegs;
3026 unsigned argAlign = 1;
3027 // Setup any HFA information about 'argx'
3028 var_types hfaType = GetHfaType(argx);
3029 bool isHfaArg = varTypeIsFloating(hfaType);
3030 unsigned hfaSlots = 0;
3034 hfaSlots = GetHfaCount(argx);
3036 // If we have a HFA struct it's possible we transition from a method that originally
3037 // only had integer types to now start having FP types. We have to communicate this
3038 // through this flag since LSRA later on will use this flag to determine whether
3039 // or not to track the FP register set.
3041 compFloatingPointUsed = true;
3045 CORINFO_CLASS_HANDLE copyBlkClass = NULL;
3046 bool isRegArg = false;
3048 fgArgTabEntryPtr argEntry = NULL;
3050 if (lateArgsComputed)
3052 argEntry = gtArgEntryByArgNum(call, argIndex);
3057 bool passUsingIntRegs;
3058 if (lateArgsComputed)
3060 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3061 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3065 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3066 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3069 GenTreePtr curArg = argx;
3070 // If late args have already been computed, use the node in the argument table.
3071 if (argEntry != NULL && argEntry->isTmp)
3073 curArg = argEntry->node;
3076 // We don't use the "size" return value from InferOpSizeAlign().
3077 codeGen->InferOpSizeAlign(curArg, &argAlign);
3079 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3080 argAlign /= TARGET_POINTER_SIZE;
3084 if (passUsingFloatRegs)
3086 if (fltArgRegNum % 2 == 1)
3088 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3092 else if (passUsingIntRegs)
3094 if (intArgRegNum % 2 == 1)
3096 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3101 if (argSlots % 2 == 1)
3107 #elif defined(_TARGET_ARM64_)
3109 if (lateArgsComputed)
3111 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3115 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3118 #elif defined(_TARGET_AMD64_)
3119 #if defined(UNIX_AMD64_ABI)
3120 if (lateArgsComputed)
3122 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3126 passUsingFloatRegs = varTypeIsFloating(argx);
3128 bool passUsingIntRegs;
3129 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3130 #else // !UNIX_AMD64_ABI
3131 passUsingFloatRegs = varTypeIsFloating(argx);
3132 #endif // !UNIX_AMD64_ABI
3133 #elif defined(_TARGET_X86_)
3135 passUsingFloatRegs = false;
3138 #error Unsupported or unset target architecture
3141 bool isBackFilled = false;
3142 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3143 var_types structBaseType = TYP_STRUCT;
3144 unsigned structSize = 0;
3146 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3147 unsigned int structFloatRegs = 0;
3148 unsigned int structIntRegs = 0;
3149 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3150 bool isStructArg = varTypeIsStruct(argx);
3152 if (lateArgsComputed)
3154 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3155 // Get the struct description for the already completed struct argument.
3156 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3157 assert(fgEntryPtr != nullptr);
3159 // As described in few other places, this can happen when the argx was morphed
3160 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3161 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3162 // was a struct and the struct classification.
3163 isStructArg = fgEntryPtr->isStruct;
3167 structDesc.CopyFrom(fgEntryPtr->structDesc);
3169 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3171 assert(argEntry != NULL);
3172 if (argEntry->IsBackFilled())
3175 size = argEntry->numRegs;
3176 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3178 isBackFilled = true;
3180 else if (argEntry->regNum == REG_STK)
3183 assert(argEntry->numRegs == 0);
3184 size = argEntry->numSlots;
3189 assert(argEntry->numRegs > 0);
3190 size = argEntry->numRegs + argEntry->numSlots;
3193 // This size has now been computed
3196 else // !lateArgsComputed
3199 // Figure out the size of the argument. This is either in number of registers, or number of TARGET_POINTER_SIZE
3200 // stack slots, or the sum of these if the argument is split between the registers and the stack.
3202 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3204 #if defined(_TARGET_AMD64_)
3205 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3208 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3212 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3213 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3216 hasMultiregStructArgs = true;
3219 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3220 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3221 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3222 #elif defined(_TARGET_ARM64_)
3227 size = GetHfaCount(argx);
3228 // HFA structs are passed by value in multiple registers
3229 hasMultiregStructArgs = true;
3233 // Structs are either passed in 1 or 2 (64-bit) slots
3234 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3238 // Structs that are the size of 2 pointers are passed by value in multiple registers
3239 hasMultiregStructArgs = true;
3243 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by reference (to a copy)
3246 // Note that there are some additional rules for multireg structs.
3247 // (i.e they cannot be split betwen registers and the stack)
3251 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3253 #elif defined(_TARGET_ARM_)
3256 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3261 size = genTypeStSz(argx->gtType);
3263 #elif defined(_TARGET_X86_)
3264 size = genTypeStSz(argx->gtType);
3266 #error Unsupported or unset target architecture
3267 #endif // _TARGET_XXX_
3272 size = GetHfaCount(argx);
3274 #endif // _TARGET_ARM_
3277 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3278 if (argx->gtOper == GT_MKREFANY)
3280 if (varTypeIsStruct(argx))
3284 #ifdef _TARGET_AMD64_
3285 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3286 if (varTypeIsStruct(argx))
3288 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3289 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3290 size = roundupSize / TARGET_POINTER_SIZE;
3291 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3294 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3302 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3304 GenTreePtr argObj = argx;
3305 GenTreePtr* parentOfArgObj = parentArgx;
3307 assert(args->IsList());
3308 assert(argx == args->Current());
3310 /* The GT_OBJ may be be a child of a GT_COMMA */
3311 while (argObj->gtOper == GT_COMMA)
3313 parentOfArgObj = &argObj->gtOp.gtOp2;
3314 argObj = argObj->gtOp.gtOp2;
3317 if (argObj->gtOper != GT_OBJ)
3318 BADCODE("illegal argument tree in fgMorphArgs");
3320 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3321 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3322 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3323 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3325 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3326 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3327 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3329 structSize = originalSize;
3331 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3332 // On System V OS-es a struct is never passed by reference.
3333 // It is either passed by value on the stack or in registers.
3334 bool passStructInRegisters = false;
3335 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3336 bool passStructByRef = false;
3337 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3339 // The following if-then-else needs to be carefully refactored
3340 // Basically the else portion wants to turn a struct load (a GT_OBJ)'
3341 // into a GT_IND of the appropriate size.
3342 // It can do this with structs sizes that are 1,2,4, or 8 bytes
3343 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3344 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below
3345 // It also can't do this if we have a HFA arg,
3346 // unless we have a 1-elem HFA in which case we want to do the optization
3348 #ifndef _TARGET_X86_
3349 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3350 // Check for struct argument with size 1, 2, 4 or 8 bytes
3351 // As we can optimize these by turning them into a GT_IND of the correct type
3352 if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0) || (isHfaArg && (hfaSlots != 1)))
3353 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3355 // Normalize 'size' to the number of pointer sized items
3356 // 'size' is the number of register slots that we will use to pass the argument
3357 size = roundupSize / TARGET_POINTER_SIZE;
3358 #if defined(_TARGET_AMD64_)
3359 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3360 size = 1; // This must be copied to a temp and passed by address
3361 passStructByRef = true;
3362 copyBlkClass = objClass;
3363 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3364 if (!structDesc.passedInRegisters)
3366 passStructInRegisters = false;
3367 copyBlkClass = NO_CLASS_HANDLE;
3371 // The objClass is used to materialize the struct on stack.
3372 passStructInRegisters = true;
3373 copyBlkClass = objClass;
3375 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3376 #elif defined(_TARGET_ARM64_)
3377 if ((size > 2) && !isHfaArg)
3379 size = 1; // This must be copied to a temp and passed by address
3380 passStructByRef = true;
3381 copyBlkClass = objClass;
3386 // If we're passing a promoted struct local var,
3387 // we may need to skip some registers due to alignment; record those.
3388 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3391 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3392 if (varDsc->lvPromoted)
3394 assert(argObj->OperGet() == GT_OBJ);
3395 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3397 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3401 #endif // _TARGET_ARM_
3403 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3404 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3406 // change our GT_OBJ into a GT_IND of the correct type.
3407 // We've already ensured above that size is a power of 2, and less than or equal to pointer size.
3408 structBaseType = argOrReturnTypeForStruct(originalSize, objClass, false /* forReturn */);
3411 // If we reach here with an HFA arg it has to be a one element HFA
3412 assert(hfaSlots == 1);
3413 structBaseType = hfaType; // change the indirection type to a floating point type
3415 noway_assert(structBaseType != TYP_UNKNOWN);
3417 argObj->ChangeOper(GT_IND);
3419 // Now see if we can fold *(&X) into X
3420 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3422 GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3424 // Keep the DONT_CSE flag in sync
3425 // (as the addr always marks it for its op1)
3426 temp->gtFlags &= ~GTF_DONT_CSE;
3427 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3428 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3429 DEBUG_DESTROY_NODE(argObj); // GT_IND
3432 *parentOfArgObj = temp;
3434 // If the OBJ had been the top level node, we've now changed argx.
3435 if (parentOfArgObj == parentArgx)
3438 if (argObj->gtOper == GT_LCL_VAR)
3440 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3441 LclVarDsc * varDsc = &lvaTable[lclNum];
3443 if (varDsc->lvPromoted)
3445 if (varDsc->lvFieldCnt == 1)
3447 // get the first and only promoted field
3448 LclVarDsc * fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3449 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3451 // we will use the first and only promoted field
3452 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3454 if (varTypeCanReg(fieldVarDsc->TypeGet()) && (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3456 // Just use the existing field's type
3457 argObj->gtType = fieldVarDsc->TypeGet();
3461 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3463 argObj->ChangeOper(GT_LCL_FLD);
3464 argObj->gtType = structBaseType;
3466 assert(varTypeCanReg(argObj->TypeGet()));
3467 assert(copyBlkClass == NO_CLASS_HANDLE);
3471 // use GT_LCL_FLD to swizzle the single field struct to a new type
3472 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3473 argObj->ChangeOper(GT_LCL_FLD);
3474 argObj->gtType = structBaseType;
3479 // The struct fits into a single register, but it has been promoted into its
3480 // constituent fields, and so we have to re-assemble it
3481 copyBlkClass = objClass;
3483 // Alignment constraints may cause us not to use (to "skip") some argument registers.
3484 // Add those, if any, to the skipped (int) arg reg mask.
3485 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3486 #endif // _TARGET_ARM_
3489 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3491 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3492 argObj->ChangeOper(GT_LCL_FLD);
3493 argObj->gtType = structBaseType;
3498 // Not a GT_LCL_VAR, so we can just change the type on the node
3499 argObj->gtType = structBaseType;
3501 assert(varTypeCanReg(argObj->TypeGet()) ||
3502 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3506 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3508 #endif // not _TARGET_X86_
3509 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3510 if ((structBaseType == TYP_STRUCT) &&
3511 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3512 !passStructInRegisters
3513 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3515 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3518 if (isHfaArg && passUsingFloatRegs)
3520 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3524 // if the valuetype size is not a multiple of sizeof(void*),
3525 // we must copyblk to a temp before doing the obj to avoid
3526 // the obj reading memory past the end of the valuetype
3527 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
3528 // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
3529 copyBlkClass = objClass;
3530 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3531 #else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
3532 if (roundupSize > originalSize)
3534 copyBlkClass = objClass;
3536 // There are a few special cases where we can omit using a CopyBlk
3537 // where we normally would need to use one.
3539 GenTreePtr objAddr = argObj->gtObj.gtOp1;
3540 if (objAddr->gtOper == GT_ADDR)
3542 // exception : no need to use CopyBlk if the valuetype is on the stack
3543 if (objAddr->gtFlags & GTF_ADDR_ONSTACK)
3545 copyBlkClass = NO_CLASS_HANDLE;
3547 // exception : no need to use CopyBlk if the valuetype is already a struct local
3548 else if (objAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3550 copyBlkClass = NO_CLASS_HANDLE;
3555 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3556 #endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
3562 hasMultiregStructArgs = true;
3566 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3570 // Figure out if the argument will be passed in a register.
3572 bool passedInRegisters = true;
3573 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3574 passedInRegisters = !isStructArg;
3575 if (!passedInRegisters)
3577 if (structDesc.passedInRegisters)
3579 passedInRegisters = true;
3583 passedInRegisters = false;
3588 if (passedInRegisters && isRegParamType(genActualType(argx->TypeGet())))
3591 if (passUsingFloatRegs)
3593 // First, see if it can be back-filled
3594 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3595 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3596 (size == 1)) // The size to back-fill is one float register
3598 // Back-fill the register.
3599 isBackFilled = true;
3600 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3601 fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3602 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3603 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3606 // Does the entire float, double, or HFA fit in the FP arg registers?
3607 // Check if the last register needed is still in the argument register range.
3608 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3612 anyFloatStackArgs = true;
3617 isRegArg = intArgRegNum < MAX_REG_ARG;
3619 #elif _TARGET_ARM64_
3620 if (passUsingFloatRegs)
3622 // Check if the last register needed is still in the fp argument register range.
3623 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3625 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3626 if (isHfaArg && !isRegArg)
3628 // recompute the 'size' so that it represent the number of stack slots rather than the number of registers
3630 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3631 size = roundupSize / TARGET_POINTER_SIZE;
3636 // Check if the last register needed is still in the int argument register range.
3637 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3639 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3641 #if defined(UNIX_AMD64_ABI)
3643 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3644 // Here a struct can be passed in register following the classifications of its members and size.
3645 // Now make sure there are actually enough registers to do so.
3648 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3650 if (structDesc.IsIntegralSlot(i))
3654 else if (structDesc.IsSseSlot(i))
3660 if (((nextFltArgRegNum + structFloatRegs) > MAX_FLOAT_REG_ARG) ||
3661 ((intArgRegNum + structIntRegs) > MAX_REG_ARG))
3664 nonRegPassableStruct = true;
3669 nonRegPassableStruct = false;
3673 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3675 if (passUsingFloatRegs)
3677 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3681 isRegArg = intArgRegNum < MAX_REG_ARG;
3684 #else // !defined(UNIX_AMD64_ABI)
3685 isRegArg = (intArgRegNum+(size-1)) < maxRegArgs;
3686 #endif // !defined(UNIX_AMD64_ABI)
3687 #endif // _TARGET_ARM_
3693 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3694 nonRegPassableStruct = true;
3695 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3700 // Now we know if the argument goes in registers or not and how big it is,
3701 // whether we had to just compute it or this is a re-morph call and we looked it up.
3706 // If we ever allocate a floating point argument to the stack, then all
3707 // subsequent HFA/float/double arguments go on the stack.
3708 if (!isRegArg && passUsingFloatRegs)
3710 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3712 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3716 // If we think we're going to split a struct between integer registers and the stack, check to
3717 // see if we've already assigned a floating-point arg to the stack.
3718 if (isRegArg && // We decided above to use a register for the argument
3719 !passUsingFloatRegs && // We're using integer registers
3720 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3721 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3723 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3725 // Skip the rest of the integer argument registers
3726 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
3728 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3732 #endif // _TARGET_ARM_
3735 regNumber nextRegNum = REG_STK;
3736 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3737 regNumber nextOtherRegNum = REG_STK;
3739 if (isStructArg && structDesc.passedInRegisters)
3741 // It is a struct passed in registers. Assign the next available register.
3742 unsigned int curIntReg = intArgRegNum;
3743 unsigned int curFloatReg = nextFltArgRegNum;
3744 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3746 if (structDesc.IsIntegralSlot(i))
3750 nextRegNum = genMapIntRegArgNumToRegNum(curIntReg);
3752 // For non-completed args the counters are incremented already
3753 // in the !lateArgsComputed above.
3754 if (lateArgsComputed)
3761 nextOtherRegNum = genMapIntRegArgNumToRegNum(curIntReg);
3763 if (lateArgsComputed)
3770 assert(false && "fgMorphArgs Invalid index for int classification.");
3775 else if (structDesc.IsSseSlot(i))
3779 nextRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
3781 if (lateArgsComputed)
3788 nextOtherRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
3790 if (lateArgsComputed)
3797 assert(false && "fgMorphArgs Invalid index for SSE classification.");
3805 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3807 // fill in or update the argInfo table
3808 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum);
3811 #ifdef _TARGET_AMD64_
3812 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3817 #ifndef LEGACY_BACKEND
3818 // If there are nonstandard args (outside the calling convention) they were inserted above
3819 // and noted them in a table so we can recognize them here and build their argInfo.
3821 // They should not affect the placement of any other args or stack space required.
3822 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3823 bool nonStandardFound = false;
3824 for (int i=0; i<nonStandardArgs.Height(); i++)
3826 hasNonStandardArg = true;
3827 if (argx == nonStandardArgs.Index(i).node)
3829 fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex,
3832 nonStandardArgs.Index(i).reg,
3835 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3839 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3841 argEntry->isNonStandard = true;
3843 nonStandardFound = true;
3847 if (nonStandardFound)
3849 #endif // !LEGACY_BACKEND
3851 if (!lateArgsComputed)
3853 // This is a register argument - put it in the table
3854 fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex,
3860 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3864 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3866 (void)newArg; //prevent "unused variable" error from GCC
3868 newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
3871 newArg->SetIsBackFilled(isBackFilled);
3872 #endif // _TARGET_ARM_
3876 // This is a register argument - possibly update it in the table
3877 fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
3878 if (entry->isNonStandard)
3885 // Setup the next argRegNum value
3888 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3891 intArgRegNum += structIntRegs;
3892 fltArgRegNum += structFloatRegs;
3895 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3897 if (passUsingFloatRegs)
3899 fltArgRegNum += size;
3901 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
3902 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3903 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
3904 #endif // _TARGET_AMD64_
3906 if (fltArgRegNum > MAX_FLOAT_REG_ARG)
3908 // This indicates a partial enregistration of a struct type
3909 assert(varTypeIsStruct(argx));
3910 unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
3911 assert((unsigned char)numRegsPartial == numRegsPartial);
3912 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
3913 fltArgRegNum = MAX_FLOAT_REG_ARG;
3915 #endif // _TARGET_ARM_
3919 intArgRegNum += size;
3921 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
3922 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE);
3923 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
3924 #endif // _TARGET_AMD64_
3926 if (intArgRegNum > MAX_REG_ARG)
3928 // This indicates a partial enregistration of a struct type
3929 assert((isStructArg) || argx->OperIsCopyBlkOp() ||
3930 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
3931 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
3932 assert((unsigned char)numRegsPartial == numRegsPartial);
3933 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
3934 intArgRegNum = MAX_REG_ARG;
3935 fgPtrArgCntCur += size - numRegsPartial;
3937 #endif // _TARGET_ARM_
3942 else // We have an argument that is not passed in a register
3944 fgPtrArgCntCur += size;
3946 // If the register arguments have not been determined then we must fill in the argInfo
3948 if (!lateArgsComputed)
3950 // This is a stack argument - put it in the table
3951 call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
3956 // This is a stack argument - possibly update it in the table
3957 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
3960 if (copyBlkClass != NO_CLASS_HANDLE)
3962 noway_assert(!lateArgsComputed);
3963 fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
3966 #ifndef LEGACY_BACKEND
3967 if (argx->gtOper == GT_MKREFANY)
3969 NYI_X86("MKREFANY");
3971 // 'Lower' the MKREFANY tree and insert it.
3972 noway_assert(!lateArgsComputed);
3975 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
3976 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
3977 lvaSetStruct(tmp, impGetRefAnyClass(), false);
3980 // Build the mkrefany as a comma node:
3981 // (tmp.ptr=argx),(tmp.type=handle)
3982 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
3983 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
3984 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
3985 destPtrSlot->gtFlags |= GTF_VAR_DEF;
3986 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
3987 destTypeSlot->gtFlags |= GTF_VAR_DEF;
3989 GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
3990 GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
3991 GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
3993 // Change the expression to "(tmp=val)"
3994 args->gtOp.gtOp1 = asg;
3996 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
3997 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
3998 lvaSetVarAddrExposed(tmp);
4000 #endif // !LEGACY_BACKEND
4003 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4004 if (nonRegPassableStruct)
4006 nonRegPassedStructSlots += size;
4009 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4013 } // end foreach argument loop
4015 if (!lateArgsComputed)
4017 call->fgArgInfo->ArgsComplete();
4019 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum) & ~argSkippedRegMask;
4020 if (fltArgRegNum > 0)
4022 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
4023 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4028 if (call->gtCallArgs)
4030 UpdateGT_LISTFlags(call->gtCallArgs);
4033 /* Process the function address, if indirect call */
4035 if (call->gtCallType == CT_INDIRECT)
4036 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4038 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4040 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != NULL))
4043 /* Remember the maximum value we ever see */
4045 if (fgPtrArgCntMax < fgPtrArgCntCur)
4046 fgPtrArgCntMax = fgPtrArgCntCur;
4048 /* The call will pop all the arguments we pushed */
4050 fgPtrArgCntCur = genPtrArgCntSav;
4052 #if FEATURE_FIXED_OUT_ARGS
4054 // Update the outgoing argument size.
4055 // If the call is a fast tail call, it will setup its arguments in incoming arg
4056 // area instead of the out-going arg area. Therefore, don't consider fast tail
4057 // calls to update lvaOutgoingArgSpaceSize.
4058 if (!call->IsFastTailCall())
4060 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4062 #if defined(UNIX_AMD64_ABI)
4063 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4065 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4067 // First slots go in registers only, no stack needed.
4068 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4069 // and ignores floating point args (it is overly conservative in that case).
4070 if (argSlots <= MAX_REG_ARG)
4072 preallocatedArgCount = nonRegPassedStructSlots;
4076 preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG;
4078 #endif // UNIX_AMD64_ABI
4080 // Check if we need to increase the size of our Outgoing Arg Space
4081 if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
4083 lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4085 // If a function has localloc, we will need to move the outgoing arg space when the
4086 // localloc happens. When we do this, we need to maintain stack alignment. To avoid
4087 // leaving alignment-related holes when doing this move, make sure the outgoing
4088 // argument space size is a multiple of the stack alignment by aligning up to the next
4089 // stack alignment boundary.
4090 if (compLocallocUsed)
4092 lvaOutgoingArgSpaceSize = (unsigned) roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
4098 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d",
4099 argSlots, preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
4103 #endif // FEATURE_FIXED_OUT_ARGS
4105 /* Update the 'side effect' flags value for the call */
4107 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4109 // If the register arguments have already been determined
4110 // or we have no register arguments then we are done.
4112 bool needEvalArgsToTemps = true;
4114 if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument))
4116 needEvalArgsToTemps = false;
4119 if (needEvalArgsToTemps)
4121 // This is the first time that we morph this call AND it has register arguments.
4122 // Follow into the code below and do the 'defer or eval to temp' analysis.
4124 call->fgArgInfo->SortArgs();
4126 call->fgArgInfo->EvalArgsToTemps();
4128 // We may have updated the arguments
4129 if (call->gtCallArgs)
4131 UpdateGT_LISTFlags(call->gtCallArgs);
4135 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4137 // Rewrite the struct args to be passed by value on stack or in registers.
4138 fgMorphSystemVStructArgs(call, hasStructArgument);
4140 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4142 // In the future we can migrate UNIX_AMD64 to use this
4143 // method instead of fgMorphSystemVStructArgs
4144 #ifndef LEGACY_BACKEND
4145 // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
4146 if (hasMultiregStructArgs)
4148 fgMorphMultiregStructArgs(call);
4150 #endif // LEGACY_BACKEND
4152 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4157 fgArgInfoPtr argInfo = call->fgArgInfo;
4159 for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4161 fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4162 curArgEntry->Dump();
4170 #pragma warning(pop)
4173 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4174 // fgMorphSystemVStructArgs:
4175 // Rewrite the struct args to be passed by value on stack or in registers.
4178 // call: The call whose arguments need to be morphed.
4179 // hasStructArgument: Whether this call has struct arguments.
4181 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4183 unsigned flagsSummary = 0;
4187 if (hasStructArgument)
4189 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4191 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4193 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4194 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4195 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4196 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4197 // otherwise points to the list in the late args list.
4198 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4199 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4200 assert(fgEntryPtr != nullptr);
4201 GenTreePtr argx = fgEntryPtr->node;
4202 GenTreePtr lateList = nullptr;
4203 GenTreePtr lateNode = nullptr;
4207 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4209 assert(list->IsList());
4211 GenTreePtr argNode = list->Current();
4212 if (argx == argNode)
4219 assert(lateList != nullptr && lateNode != nullptr);
4221 GenTreePtr arg = argx;
4222 bool argListCreated = false;
4224 var_types type = arg->TypeGet();
4226 if (varTypeIsStruct(type))
4228 var_types originalType = type;
4229 // If we have already processed the arg...
4230 if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg))
4235 // If already OBJ it is set properly already.
4236 if (arg->OperGet() == GT_OBJ)
4238 assert(!fgEntryPtr->structDesc.passedInRegisters);
4243 arg->OperGet() == GT_LCL_VAR ||
4244 arg->OperGet() == GT_LCL_FLD ||
4245 (arg->OperGet() == GT_ADDR &&
4246 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD ||
4247 arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4249 GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ?
4250 arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4251 if (fgEntryPtr->structDesc.passedInRegisters)
4253 if (fgEntryPtr->structDesc.eightByteCount == 1)
4255 // Change the type and below the code will change the LclVar to a LCL_FLD
4256 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], fgEntryPtr->structDesc.eightByteSizes[0]);
4258 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4260 // Create LCL_FLD for each eightbyte.
4261 argListCreated = true;
4263 // Second eightbyte.
4264 GenTreeLclFld* newLclField = new(this, GT_LCL_FLD) GenTreeLclFld(
4265 GetTypeFromClassificationAndSizes(
4266 fgEntryPtr->structDesc.eightByteClassifications[1],
4267 fgEntryPtr->structDesc.eightByteSizes[1]),
4268 lclCommon->gtLclNum,
4269 fgEntryPtr->structDesc.eightByteOffsets[1]);
4270 // Note this should actually be: secondNode = gtNewArgList(newLclField)
4271 GenTreeArgList* secondNode = gtNewListNode(newLclField, nullptr);
4272 secondNode->gtType = originalType; // Preserve the type. It is a special case.
4273 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4276 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4277 arg->gtType = GetTypeFromClassificationAndSizes(
4278 fgEntryPtr->structDesc.eightByteClassifications[0],
4279 fgEntryPtr->structDesc.eightByteSizes[0]);
4280 arg = gtNewListNode(arg, secondNode);
4281 arg->gtType = type; // Preserve the type. It is a special case.
4285 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes for the CLR.
4289 // If we didn't change the type of the struct, it means
4290 // its classification doesn't support to be passed directly through a
4291 // register, so we need to pass a pointer to the destination where
4292 // where we copied the struct to.
4293 if (!argListCreated)
4295 if (fgEntryPtr->structDesc.passedInRegisters)
4301 // Make sure this is an addr node.
4302 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4304 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4307 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4309 // Create an Obj of the temp to use it as a call argument.
4310 arg = new (this, GT_OBJ) GenTreeObj(originalType, arg, lvaGetStruct(lclCommon->gtLclNum));
4311 arg->gtFlags |= GTF_EXCEPT;
4312 flagsSummary |= GTF_EXCEPT;
4319 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4320 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4321 assert(fgEntryPtr != nullptr);
4322 GenTreePtr argx = fgEntryPtr->node;
4323 GenTreePtr lateList = nullptr;
4324 GenTreePtr lateNode = nullptr;
4327 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4329 assert(list->IsList());
4331 GenTreePtr argNode = list->Current();
4332 if (argx == argNode)
4339 assert(lateList != nullptr && lateNode != nullptr);
4342 fgEntryPtr->node = arg;
4345 lateList->gtOp.gtOp1 = arg;
4349 args->gtOp.gtOp1 = arg;
4356 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4358 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4360 //-----------------------------------------------------------------------------
4361 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4362 // call fgMorphMultiregStructArg on each of them.
4365 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4368 // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4369 // The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
4370 // whicj is only used for register arguments.
4371 // If this method fails to find any TYP_STRUCT arguments it will assert.
4373 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4377 bool foundStructArg = false;
4378 unsigned initialFlags = call->gtFlags;
4379 unsigned flagsSummary = 0;
4380 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4382 // Currently only ARM64 is using this method to morph the MultiReg struct args
4383 // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
4386 NYI_ARM("fgMorphMultiregStructArgs");
4389 assert("Logic error: no MultiregStructArgs for X86");
4391 #ifdef _TARGET_AMD64_
4392 #if defined(UNIX_AMD64_ABI)
4393 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4396 assert("Logic error: no MultiregStructArgs for Windows X64 ABI");
4399 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4401 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4402 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4403 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4404 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4405 // otherwise points to the list in the late args list.
4406 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4407 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4408 assert(fgEntryPtr != nullptr);
4409 GenTreePtr argx = fgEntryPtr->node;
4410 GenTreePtr lateList = nullptr;
4411 GenTreePtr lateNode = nullptr;
4415 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4417 assert(list->IsList());
4419 GenTreePtr argNode = list->Current();
4420 if (argx == argNode)
4427 assert(lateList != nullptr && lateNode != nullptr);
4430 GenTreePtr arg = argx;
4432 if (arg->TypeGet() == TYP_STRUCT)
4434 foundStructArg = true;
4436 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4438 // Did we replace 'argx' with a new tree?
4441 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4443 // link the new arg node into either the late arg list or the gtCallArgs list
4446 lateList->gtOp.gtOp1 = arg;
4450 args->gtOp.gtOp1 = arg;
4456 // We should only call this method when we actually have one or more multireg struct args
4457 assert(foundStructArg);
4460 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4464 //-----------------------------------------------------------------------------
4465 // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
4466 // Morph the argument into a set of GT_LIST nodes.
4469 // arg - A GenTree node containing a TYP_STRUCT arg that
4470 // is to be passed in multiple registers
4471 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4474 // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4475 // for passing in multiple registers.
4476 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4477 // and if they are at the appropriate offsets we will use the struct promted fields
4478 // in the GT_LIST nodes that we create.
4479 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4480 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4481 // this also forces the struct to be stack allocated into the local frame.
4482 // For the GT_OBJ case will clone the address expression and generate two (or more)
4484 // Currently the implementation only handles ARM64 and will NYI for other architectures.
4486 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4488 assert(arg->TypeGet() == TYP_STRUCT);
4490 #ifndef _TARGET_ARM64_
4491 NYI("fgMorphMultiregStructArg requires implementation for this target");
4494 #if FEATURE_MULTIREG_ARGS
4495 // Examine 'arg' and setup argValue objClass and structSize
4497 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4498 GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
4499 unsigned structSize = 0;
4501 if (arg->OperGet() == GT_OBJ)
4503 GenTreeObj* argObj = arg->AsObj();
4504 objClass = argObj->gtClass;
4505 structSize = info.compCompHnd->getClassSize(objClass);
4507 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4509 if (argObj->gtOp1->OperGet() == GT_ADDR)
4511 argValue = argObj->gtOp1->gtOp.gtOp1;
4514 else if (arg->OperGet() == GT_LCL_VAR)
4516 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4517 unsigned varNum = varNode->gtLclNum;
4518 assert(varNum < lvaCount);
4519 LclVarDsc* varDsc = &lvaTable[varNum];
4521 objClass = lvaGetStruct(varNum);
4522 structSize = varDsc->lvExactSize;
4524 noway_assert(objClass != nullptr);
4526 var_types hfaType = TYP_UNDEF;
4527 var_types elemType = TYP_UNDEF;
4528 unsigned elemCount = 0;
4529 unsigned elemSize = 0;
4530 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4532 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4533 if (varTypeIsFloating(hfaType))
4536 elemSize = genTypeSize(elemType);
4537 elemCount = structSize / elemSize;
4538 assert(elemSize*elemCount == structSize);
4539 for (unsigned inx = 0; inx<elemCount; inx++)
4541 type[inx] = elemType;
4546 assert(structSize <= 2 * TARGET_POINTER_SIZE);
4547 BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE };
4548 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4550 type[0] = getJitGCType(gcPtrs[0]);
4551 type[1] = getJitGCType(gcPtrs[1]);
4553 if ((argValue->OperGet() == GT_LCL_FLD) ||
4554 (argValue->OperGet() == GT_LCL_VAR))
4556 // We can safely widen this to 16 bytes since we are loading from
4557 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4558 // lives in the stack frame or will be a promoted field.
4560 elemSize = TARGET_POINTER_SIZE;
4561 structSize = 2 * TARGET_POINTER_SIZE;
4563 else // we must have a GT_OBJ
4565 assert(argValue->OperGet() == GT_OBJ);
4567 // We need to load the struct from an arbitrary address
4568 // and we can't read past the end of the structSize
4569 // We adjust the second load type here
4571 if (structSize < 2 * TARGET_POINTER_SIZE)
4573 switch (structSize - TARGET_POINTER_SIZE) {
4578 type[1] = TYP_SHORT;
4584 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4590 // We should still have a TYP_STRUCT
4591 assert(argValue->TypeGet() == TYP_STRUCT);
4593 GenTreeArgList* newArg = nullptr;
4595 // Are we passing a struct LclVar?
4597 if (argValue->OperGet() == GT_LCL_VAR)
4599 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4600 unsigned varNum = varNode->gtLclNum;
4601 assert(varNum < lvaCount);
4602 LclVarDsc* varDsc = &lvaTable[varNum];
4604 // At this point any TYP_STRUCT LclVar must be a 16-byte struct
4605 // or an HFA struct, both which are passed by value.
4607 assert((varDsc->lvSize() == 2*TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4609 varDsc->lvIsMultiRegArgOrRet = true;
4614 JITDUMP("Multireg struct argument V%02u : ");
4619 // This local variable must match the layout of the 'objClass' type exactly
4620 if (varDsc->lvIsHfa())
4622 // We have a HFA struct
4623 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4624 noway_assert(elemSize == genTypeSize(elemType));
4625 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4626 noway_assert(elemSize*elemCount == varDsc->lvExactSize);
4628 for (unsigned inx = 0; (inx < elemCount); inx++)
4630 noway_assert(type[inx] == elemType);
4635 // We must have a 16-byte struct (non-HFA)
4636 noway_assert(elemCount == 2);
4638 for (unsigned inx = 0; inx < elemCount; inx++)
4640 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4642 // We setup the type[inx] value above using the GC info from 'objClass'
4643 // This GT_LCL_VAR must have the same GC layout info
4645 if (currentGcLayoutType != TYPE_GC_NONE)
4647 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4651 // We may have use a small type when we setup the type[inx] values above
4652 // We can safely widen this to TYP_I_IMPL
4653 type[inx] = TYP_I_IMPL;
4658 // Is this LclVar a promoted struct with exactly 2 fields?
4659 // TODO-ARM64-CQ: Support struct promoted HFA types here
4660 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2))
4662 // See if we have two promoted fields that start at offset 0 and 8?
4663 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4664 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4666 // Did we find the promoted fields at the necessary offsets?
4667 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4669 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4670 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4672 var_types loType = loVarDsc->lvType;
4673 var_types hiType = hiVarDsc->lvType;
4675 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4677 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer registers
4678 // So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4680 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", varNum);
4682 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4687 // We can use the struct promoted field as the two arguments
4689 GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4690 GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4692 // Create a new tree for 'arg'
4693 // replace the existing LDOBJ(ADDR(LCLVAR))
4694 // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
4696 newArg = gtNewListNode(loLclVar, gtNewArgList(hiLclVar));
4703 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4705 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4709 // If we didn't set newarg to a new List Node tree
4711 if (newArg == nullptr)
4713 if (fgEntryPtr->regNum == REG_STK)
4715 // We leave this stack passed argument alone
4719 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
4720 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
4722 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4724 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4725 unsigned varNum = varNode->gtLclNum;
4726 assert(varNum < lvaCount);
4727 LclVarDsc* varDsc = &lvaTable[varNum];
4729 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
4730 unsigned lastOffset = baseOffset + (elemCount * elemSize);
4732 // The allocated size of our LocalVar must be at least as big as lastOffset
4733 assert(varDsc->lvSize() >= lastOffset);
4735 if (varDsc->lvStructGcCount > 0)
4737 // alignment of the baseOffset is required
4738 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
4739 noway_assert(elemSize == TARGET_POINTER_SIZE);
4740 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
4741 const BYTE * gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
4742 for (unsigned inx = 0; (inx < elemCount); inx++)
4744 // The GC information must match what we setup using 'objClass'
4745 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
4748 else // this varDsc contains no GC pointers
4750 for (unsigned inx = 0; inx<elemCount; inx++)
4752 // The GC information must match what we setup using 'objClass'
4753 noway_assert(!varTypeIsGC(type[inx]));
4758 // We create a list of GT_LCL_FLDs nodes to pass this struct
4760 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4762 // Start building our list from the last element
4763 unsigned offset = lastOffset;
4764 unsigned inx = elemCount;
4766 // Create a new tree for 'arg'
4767 // replace the existing LDOBJ(ADDR(LCLVAR))
4768 // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...)
4774 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
4775 if (newArg == nullptr)
4777 newArg = gtNewArgList(nextLclFld);
4781 newArg = gtNewListNode(nextLclFld, newArg);
4785 // Are we passing a GT_OBJ struct?
4787 else if (argValue->OperGet() == GT_OBJ)
4789 GenTreeObj* argObj = argValue->AsObj();
4790 GenTreePtr baseAddr = argObj->gtOp1;
4791 var_types addrType = baseAddr->TypeGet();
4793 // Create a new tree for 'arg'
4794 // replace the existing LDOBJ(EXPR)
4795 // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...)
4798 // Start building our list from the last element
4799 unsigned offset = structSize;
4800 unsigned inx = elemCount;
4804 elemSize = genTypeSize(type[inx]);
4806 GenTreePtr curAddr = baseAddr;
4809 GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
4810 noway_assert(baseAddrDup != nullptr);
4811 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
4817 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
4818 if (newArg == nullptr)
4820 newArg = gtNewArgList(curItem);
4824 newArg = gtNewListNode(curItem, newArg);
4830 // If we reach here we should have set newArg to something
4831 if (newArg == nullptr)
4834 gtDispTree(argValue);
4836 assert(!"Missing case in fgMorphMultiregStructArg");
4842 printf("fgMorphMultiregStructArg created tree:\n");
4847 arg = newArg; // consider calling fgMorphTree(newArg);
4849 #endif // FEATURE_MULTIREG_ARGS
4855 // Make a copy of a struct variable if necessary, to pass to a callee.
4856 // returns: tree that computes address of the outgoing arg
4858 Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
4861 CORINFO_CLASS_HANDLE copyBlkClass
4862 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
4864 GenTree* argx = args->Current();
4865 noway_assert(argx->gtOper != GT_MKREFANY);
4866 // See if we need to insert a copy at all
4867 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
4868 // but if there is only one use and no loops, the use must be last.
4869 if (argx->gtOper == GT_OBJ)
4871 GenTree* lcl = argx->gtOp.gtOp1;
4872 if (lcl->OperIsLocal())
4874 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
4875 if (lvaIsImplicitByRefLocal(varNum))
4877 LclVarDsc* varDsc = &lvaTable[varNum];
4878 if (varDsc->lvRefCnt == 1 && !fgMightHaveLoop())
4880 varDsc->lvRefCnt = 0;
4881 args->gtOp.gtOp1 = lcl;
4882 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4885 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
4886 varDsc->lvRefCnt = 0;
4891 varDsc->lvRefCnt = 0;
4897 if (fgOutgoingArgTemps == nullptr)
4898 fgOutgoingArgTemps = hashBv::Create(this);
4903 // Attempt to find a local we have already used for an outgoing struct and reuse it.
4904 // We do not reuse within a statement.
4905 if (!opts.MinOpts())
4908 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
4910 LclVarDsc* varDsc = &lvaTable[lclNum];
4911 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass))
4912 && !fgCurrentlyInUseArgTemps->testBit(lclNum))
4914 tmp = (unsigned) lclNum;
4916 JITDUMP("reusing outgoing struct arg");
4923 // Create the CopyBlk tree and insert it.
4927 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
4928 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
4929 lvaSetStruct(tmp, copyBlkClass, false);
4930 fgOutgoingArgTemps->setBit(tmp);
4933 fgCurrentlyInUseArgTemps->setBit(tmp);
4935 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
4936 // allocated on stack and address of it needs to be passed.
4937 if (lclVarIsSIMDType(tmp))
4939 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
4942 // Create a reference to the temp
4943 GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
4944 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
4945 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
4946 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
4949 if (argx->gtOper == GT_OBJ)
4951 src = argx->gtOp.gtOp1;
4955 argx->gtFlags |= GTF_DONT_CSE;
4956 src = gtNewOperNode(GT_ADDR, TYP_BYREF, argx);
4959 // Copy the valuetype to the temp
4960 GenTreePtr copyBlk = gtNewCpObjNode(dest, src, copyBlkClass, false);
4961 copyBlk = fgMorphCopyBlock(copyBlk);
4963 #if FEATURE_FIXED_OUT_ARGS
4965 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
4966 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
4967 GenTreePtr arg = copyBlk;
4969 #else // FEATURE_FIXED_OUT_ARGS
4971 // Structs are always on the stack, and thus never need temps
4972 // so we have to put the copy and temp all into one expression
4973 GenTreePtr arg = fgMakeTmpArgNode(
4975 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
4977 // Change the expression to "(tmp=val),tmp"
4978 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
4980 #endif // FEATURE_FIXED_OUT_ARGS
4982 args->gtOp.gtOp1 = arg;
4983 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
4989 // See declaration for specification comment.
4990 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
4991 unsigned firstArgRegNum,
4992 regMaskTP* pArgSkippedRegMask)
4994 assert(varDsc->lvPromoted);
4995 // There's no way to do these calculations without breaking abstraction and assuming that
4996 // integer register arguments are consecutive ints. They are on ARM.
4998 // To start, figure out what register contains the last byte of the first argument.
4999 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5000 unsigned lastFldRegOfLastByte = (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;;
5002 // Now we're keeping track of the register that the last field ended in; see what registers
5003 // subsequent fields start in, and whether any are skipped.
5004 // (We assume here the invariant that the fields are sorted in offset order.)
5005 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5007 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5008 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5009 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5010 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5011 // This loop should enumerate the offsets of any registers skipped.
5012 // Find what reg contains the last byte:
5013 // And start at the first register after that. If that isn't the first reg of the current
5014 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; skippedRegOffsets++)
5016 // If the register number would not be an arg reg, we're done.
5017 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) return;
5018 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5020 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5024 #endif // _TARGET_ARM_
5027 /*****************************************************************************
5029 * The companion to impFixupCallStructReturn. Now that the importer is done
5030 * and we no longer care as much about the declared return type, change to
5031 * precomputed native return type (at least for architectures that don't
5032 * always use return buffers for structs).
5035 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5037 GenTreeCall* call = callNode->AsCall();
5038 bool callHasRetBuffArg = call->HasRetBufArg();
5040 if (!callHasRetBuffArg && varTypeIsStruct(call))
5043 if (call->gtCall.IsVarargs() || !IsHfa(call))
5046 // Now that we are past the importer, re-type this node so the register predictor does
5048 call->gtType = genActualType((var_types)call->gtCall.gtReturnType);
5053 // Either we don't have a struct now or if struct, then it is HFA returned in regs.
5054 assert(!varTypeIsStruct(call) || (IsHfa(call) && !callHasRetBuffArg));
5055 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5056 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5057 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5059 // No more struct returns
5060 assert(call->TypeGet() != TYP_STRUCT);
5063 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5064 // If it was a struct return, it has been transformed into a call
5065 // with a return buffer (that returns TYP_VOID) or into a return
5066 // of a primitive/enregisterable type
5067 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5072 /*****************************************************************************
5074 * A little helper used to rearrange nested commutative operations. The
5075 * effect is that nested associative, commutative operations are transformed
5076 * into a 'left-deep' tree, i.e. into something like this:
5078 * (((a op b) op c) op d) op...
5083 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5091 op1 = tree->gtOp.gtOp1;
5092 op2 = tree->gtOp.gtOp2;
5093 oper = tree->OperGet();
5095 noway_assert(GenTree::OperIsCommutative(oper));
5096 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR ||
5097 oper == GT_AND || oper == GT_MUL);
5098 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5099 noway_assert(oper == op2->gtOper);
5101 // Commutativity doesn't hold if overflow checks are needed
5103 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5106 if (gtIsActiveCSE_Candidate(op2))
5108 // If we have marked op2 as a CSE candidate,
5109 // we can't perform a commutative reordering
5110 // because any value numbers that we computed for op2
5111 // will be incorrect after performing a commutative reordering
5116 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5119 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5120 if ( ((oper == GT_ADD) || (oper == GT_MUL))
5121 && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) )
5126 if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN )
5128 // We could deal with this, but we were always broken and just hit the assert
5129 // below regarding flags, which means it's not frequent, so will just bail out.
5134 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5136 GenTreePtr ad1 = op2->gtOp.gtOp1;
5137 GenTreePtr ad2 = op2->gtOp.gtOp2;
5139 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5140 // We can not reorder such GT_OR trees
5142 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5145 /* Change "(x op (y op z))" to "(x op y) op z" */
5146 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5148 GenTreePtr new_op1 = op2;
5150 new_op1->gtOp.gtOp1 = op1;
5151 new_op1->gtOp.gtOp2 = ad1;
5153 /* Change the flags. */
5155 // Make sure we arent throwing away any flags
5156 noway_assert((new_op1->gtFlags & ~(
5158 GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5159 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5160 GTF_NODE_MASK|GTF_ALL_EFFECT|GTF_UNSIGNED)) == 0);
5162 new_op1->gtFlags = (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5163 (op1->gtFlags & GTF_ALL_EFFECT) |
5164 (ad1->gtFlags & GTF_ALL_EFFECT);
5166 /* Retype new_op1 if it has not/become a GC ptr. */
5168 if (varTypeIsGC(op1->TypeGet()))
5170 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && oper == GT_ADD) || // byref(ref + (int+int))
5171 (varTypeIsI (tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && oper == GT_OR)); // int(gcref | int(gcref|intval))
5173 new_op1->gtType = tree->gtType;
5175 else if (varTypeIsGC(ad2->TypeGet()))
5177 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5178 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5179 new_op1->gtType = TYP_I_IMPL;
5182 // If new_op1 is a new expression. Assign it a new unique value number.
5183 // vnStore is null before the ValueNumber phase has run
5184 if (vnStore != nullptr)
5186 // We can only keep the old value number on new_op1 if both op1 and ad2
5187 // have the same non-NoVN value numbers. Since op is commutative, comparing
5188 // only ad2 and op1 is enough.
5189 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5190 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5191 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5193 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(new_op1->TypeGet()));
5197 tree->gtOp.gtOp1 = new_op1;
5198 tree->gtOp.gtOp2 = ad2;
5200 /* If 'new_op1' is now the same nested op, process it recursively */
5202 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5203 fgMoveOpsLeft(new_op1);
5205 /* If 'ad2' is now the same nested op, process it
5206 * Instead of recursion, we set up op1 and op2 for the next loop.
5212 while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5219 /*****************************************************************************/
5221 void Compiler::fgSetRngChkTarget(GenTreePtr tree,
5224 GenTreeBoundsChk* bndsChk = nullptr;
5225 SpecialCodeKind kind = SCK_RNGCHK_FAIL;
5228 if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5229 #else // FEATURE_SIMD
5230 if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5231 #endif // FEATURE_SIMD
5233 bndsChk = tree->AsBoundsChk();
5234 kind = tree->gtBoundsChk.gtThrowKind;
5238 noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5242 unsigned callStkDepth = fgPtrArgCntCur;
5244 // only x86 pushes args
5245 const unsigned callStkDepth = 0;
5252 // we need to initialize this field
5253 if (fgGlobalMorph && bndsChk != nullptr)
5255 bndsChk->gtStkDepth = callStkDepth;
5259 if (!opts.compDbgCode)
5261 if (delay || compIsForInlining())
5263 /* We delay this until after loop-oriented range check
5264 analysis. For now we merely store the current stack
5265 level in the tree node.
5267 if (bndsChk != nullptr)
5269 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5270 bndsChk->gtStkDepth = callStkDepth;
5275 /* Create/find the appropriate "range-fail" label */
5277 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5278 noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5280 unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth
5283 BasicBlock * rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5285 /* Add the label to the indirection node */
5287 if (bndsChk != nullptr)
5289 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5295 /*****************************************************************************
5297 * Expand a GT_INDEX node and fully morph the child operands
5299 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5300 * the array element. We expand the GT_INDEX node into a larger tree that
5301 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5302 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5303 * For complex array or index expressions one or more GT_COMMA assignments
5304 * are inserted so that we only evaluate the array or index expressions once.
5306 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5307 * perform local constant prop and reorder the constants in the tree and
5310 * We then parse the resulting array element expression in order to locate
5311 * and label the constants and variables that occur in the tree.
5314 const int MAX_ARR_COMPLEXITY = 4;
5315 const int MAX_INDEX_COMPLEXITY = 4;
5317 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5319 noway_assert(tree->gtOper == GT_INDEX);
5320 GenTreeIndex* asIndex = tree->AsIndex();
5322 var_types elemTyp = tree->TypeGet();
5323 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5324 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5326 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5329 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5331 // If this is a SIMD type, this is the point at which we lose the type information,
5332 // so we need to set the correct type on the GT_IND.
5333 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5334 unsigned simdElemSize = 0;
5335 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5337 assert(simdElemSize == elemSize);
5338 elemTyp = getSIMDTypeForSize(elemSize);
5339 // This is the new type of the node.
5340 tree->gtType = elemTyp;
5341 // Now set elemStructType to null so that we don't confuse value numbering.
5342 elemStructType = nullptr;
5345 #endif // FEATURE_SIMD
5347 GenTreePtr arrRef = asIndex->Arr();
5348 GenTreePtr index = asIndex->Index();
5350 // Set up the the array length's offset into lenOffs
5351 // And the the first element's offset into elemOffs
5354 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5356 lenOffs = offsetof(CORINFO_String, stringLen);
5357 elemOffs = offsetof(CORINFO_String, chars);
5358 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5360 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5362 lenOffs = offsetof(CORINFO_RefArray, length);
5363 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5365 else // We have a standard array
5367 lenOffs = offsetof(CORINFO_Array, length);
5368 elemOffs = offsetof(CORINFO_Array, u1Elems);
5371 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5372 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE ) != 0);
5374 GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5375 GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5376 GenTreePtr bndsChk = nullptr;
5378 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5381 GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5382 GenTreePtr index2 = nullptr;
5384 // If the arrRef expression involves an assignment, a call or reads from global memory,
5385 // then we *must* allocate a temporary in which to "localize" those values,
5386 // to ensure that the same values are used in the bounds check and the actual
5388 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5390 if ((arrRef->gtFlags & (GTF_ASG|GTF_CALL|GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
5392 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5393 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5394 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5395 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5399 arrRef2 = gtCloneExpr(arrRef);
5400 noway_assert(arrRef2 != nullptr);
5403 // If the index expression involves an assignment, a call or reads from global memory,
5404 // we *must* allocate a temporary in which to "localize" those values,
5405 // to ensure that the same values are used in the bounds check and the actual
5407 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5409 if ((index->gtFlags & (GTF_ASG|GTF_CALL|GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
5411 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5412 indexDefn = gtNewTempAssign(indexTmpNum, index);
5413 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5414 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5418 index2 = gtCloneExpr(index);
5419 noway_assert(index2 != nullptr);
5422 // Next introduce a GT_ARR_BOUNDS_CHECK node
5423 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5425 #ifdef _TARGET_64BIT_
5426 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5427 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5428 // the comparison will have to be widen to 64 bits.
5429 if (index->TypeGet() == TYP_I_IMPL)
5431 bndsChkType = TYP_I_IMPL;
5433 #endif // _TARGET_64BIT_
5435 GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5437 if (bndsChkType != TYP_INT)
5439 arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5442 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
5444 bndsChk = arrBndsChk;
5446 // Make sure to increment ref-counts if already ref-counted.
5447 if (lvaLocalVarRefCounted)
5449 lvaRecursiveIncRefCounts(index);
5450 lvaRecursiveIncRefCounts(arrRef);
5453 // Now we'll switch to using the second copies for arrRef and index
5454 // to compute the address expression
5460 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5464 // Widen 'index' on 64-bit targets
5465 #ifdef _TARGET_64BIT_
5466 if (index->TypeGet() != TYP_I_IMPL)
5468 if (index->OperGet() == GT_CNS_INT)
5470 index->gtType = TYP_I_IMPL;
5474 index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5477 #endif // _TARGET_64BIT_
5479 /* Scale the index value if necessary */
5482 GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5484 // Fix 392756 WP7 Crossgen
5486 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5487 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5488 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5490 size->gtFlags |= GTF_DONT_CSE;
5492 /* Multiply by the array element size */
5493 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5500 /* Add the object ref to the element's offset */
5502 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5504 /* Add the first element's offset */
5506 GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5508 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5510 #if SMALL_TREE_NODES
5511 assert(tree->gtFlags & GTF_NODE_LARGE);
5514 // Change the orginal GT_INDEX node into a GT_IND node
5515 tree->SetOper(GT_IND);
5517 // If the index node is a floating-point type, notify the compiler
5518 // we'll potentially use floating point registers at the time of codegen.
5519 if (varTypeIsFloating(tree->gtType))
5521 this->compFloatingPointUsed = true;
5524 // We've now consumed the GTF_INX_RNGCHK, and the node
5525 // is no longer a GT_INDEX node.
5526 tree->gtFlags &= ~GTF_INX_RNGCHK;
5528 tree->gtOp.gtOp1 = addr;
5530 // This is an array index expression.
5531 tree->gtFlags |= GTF_IND_ARR_INDEX;
5533 /* An indirection will cause a GPF if the address is null */
5534 tree->gtFlags |= GTF_EXCEPT;
5537 tree->gtFlags |= GTF_DONT_CSE;
5539 // Store information about it.
5540 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int) elemOffs, elemStructType));
5542 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5544 GenTreePtr indTree = tree;
5546 // Did we create a bndsChk tree?
5549 // Use a GT_COMMA node to prepend the array bound check
5551 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5553 /* Mark the indirection node as needing a range check */
5554 fgSetRngChkTarget(bndsChk);
5557 if (indexDefn != nullptr)
5559 // Use a GT_COMMA node to prepend the index assignment
5561 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5563 if (arrRefDefn != nullptr)
5565 // Use a GT_COMMA node to prepend the arRef assignment
5567 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5570 // Currently we morph the tree to perform some folding operations prior
5571 // to attaching fieldSeq info and labeling constant array index contributions
5575 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5576 // constant array index contributions, but the morphing operation may have changed
5577 // the 'tree' into something that now unconditionally throws an exception.
5579 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5580 // or it could be left unchanged. If it is unchanged then we should not return,
5581 // instead we should proceed to attaching fieldSeq info, etc...
5583 GenTreePtr arrElem = tree->gtEffectiveVal();
5585 if (fgIsCommaThrow(tree))
5587 if ((arrElem != indTree) || // A new tree node may have been created
5588 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5590 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
5594 assert(!fgGlobalMorph || (arrElem->gtFlags & GTF_MORPHED));
5596 addr = arrElem->gtOp.gtOp1;
5598 assert(addr->TypeGet() == TYP_BYREF);
5600 GenTreePtr cnsOff = nullptr;
5601 if (addr->OperGet() == GT_ADD)
5603 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
5605 cnsOff = addr->gtOp.gtOp2;
5606 addr = addr->gtOp.gtOp1;
5609 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
5611 assert(addr->TypeGet() == TYP_BYREF);
5612 GenTreePtr index = addr->gtOp.gtOp2;
5614 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5615 index->LabelIndex(this);
5617 addr = addr->gtOp.gtOp1;
5619 assert(addr->TypeGet() == TYP_REF);
5621 else if (addr->OperGet() == GT_CNS_INT)
5626 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
5628 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
5630 // Assign it the [#FirstElem] field sequence
5632 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
5634 else // We have folded the first element's offset with the index expression
5636 // Build the [#ConstantIndex, #FirstElem] field sequence
5638 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
5639 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
5641 if (cnsOff == nullptr) // It must have folded into a zero offset
5643 // Record in the general zero-offset map.
5644 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
5648 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
5656 /*****************************************************************************
5658 * Wrap fixed stack arguments for varargs functions to go through varargs
5659 * cookie to access them, except for the cookie itself.
5661 * Non-x86 platforms are allowed to access all arguments directly
5662 * so we don't need this code.
5665 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
5667 /* For the fixed stack arguments of a varargs function, we need to go
5668 through the varargs cookies to access them, except for the
5671 LclVarDsc * varDsc = &lvaTable[lclNum];
5673 if (varDsc->lvIsParam && !varDsc->lvIsRegArg &&
5674 lclNum != lvaVarargsHandleArg)
5676 // Create a node representing the local pointing to the base of the args
5677 GenTreePtr ptrArg = gtNewOperNode(GT_SUB, TYP_I_IMPL,
5678 gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
5679 gtNewIconNode(varDsc->lvStkOffs
5680 - codeGen->intRegState.rsCalleeRegArgNum*sizeof(void*)
5683 // Access the argument through the local
5684 GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg);
5685 tree->gtFlags |= GTF_IND_TGTANYWHERE;
5687 if (varDsc->lvAddrExposed)
5689 tree->gtFlags |= GTF_GLOB_REF;
5692 return fgMorphTree(tree);
5699 /*****************************************************************************
5701 * Transform the given GT_LCL_VAR tree for code generation.
5704 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
5706 noway_assert(tree->gtOper == GT_LCL_VAR);
5708 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
5709 var_types varType = lvaGetRealType(lclNum);
5710 LclVarDsc * varDsc = &lvaTable[lclNum];
5712 if (varDsc->lvAddrExposed)
5714 tree->gtFlags |= GTF_GLOB_REF;
5718 if (info.compIsVarArgs)
5720 GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
5721 if (newTree != NULL)
5724 #endif // _TARGET_X86_
5726 /* If not during the global morphing phase bail */
5731 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
5733 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
5736 varTypeIsSmall(varDsc->TypeGet()) &&
5737 varDsc->lvNormalizeOnLoad())
5739 #if LOCAL_ASSERTION_PROP
5740 /* Assertion prop can tell us to omit adding a cast here */
5741 if (optLocalAssertionProp &&
5742 optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
5747 /* Small-typed arguments and aliased locals are normalized on load.
5748 Other small-typed locals are normalized on store.
5749 Also, under the debugger as the debugger could write to the variable.
5750 If this is one of the former, insert a narrowing cast on the load.
5751 ie. Convert: var-short --> cast-short(var-int) */
5753 tree->gtType = TYP_INT;
5754 fgMorphTreeDone(tree);
5755 tree = gtNewCastNode(TYP_INT, tree, varType);
5756 fgMorphTreeDone(tree);
5764 /*****************************************************************************
5765 Grab a temp for big offset morphing.
5766 This method will grab a new temp if no temp of this "type" has been created.
5767 Or it will return the same cached one if it has been created.
5769 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
5771 unsigned lclNum = fgBigOffsetMorphingTemps[type];
5773 if (lclNum == BAD_VAR_NUM) {
5774 // We haven't created a temp for this kind of type. Create one now.
5775 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
5776 fgBigOffsetMorphingTemps[type] = lclNum;
5779 // We better get the right type.
5780 noway_assert(lvaTable[lclNum].TypeGet() == type);
5783 noway_assert(lclNum != BAD_VAR_NUM);
5788 /*****************************************************************************
5790 * Transform the given GT_FIELD tree for code generation.
5793 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
5795 assert(tree->gtOper == GT_FIELD);
5797 noway_assert(tree->gtFlags & GTF_GLOB_REF);
5799 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
5800 unsigned fldOffset = tree->gtField.gtFldOffset;
5801 GenTreePtr objRef = tree->gtField.gtFldObj;
5802 bool fieldMayOverlap = false;
5803 if (tree->gtField.gtFldMayOverlap)
5805 fieldMayOverlap = true;
5806 // Reset the flag because we may reuse the node.
5807 tree->gtField.gtFldMayOverlap = false;
5811 // if this field belongs to simd struct, tranlate it to simd instrinsic.
5812 if (mac == nullptr || mac->m_kind != MACK_Addr)
5814 GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
5815 if (newTree != tree)
5817 newTree = fgMorphSmpOp(newTree);
5821 else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
5823 // We have a field of an SIMD intrinsic in an address-taken context.
5824 // We need to copy the SIMD result to a temp, and take the field of that.
5825 GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
5826 objRef->gtOp.gtOp1 = copy;
5830 /* Is this an instance data member? */
5836 if (tree->gtFlags & GTF_IND_TLS_REF)
5837 NO_WAY("instance field can not be a TLS ref.");
5839 /* We'll create the expression "*(objRef + mem_offs)" */
5841 noway_assert(varTypeIsGC(objRef->TypeGet()) ||
5842 objRef->TypeGet() == TYP_I_IMPL);
5844 // An optimization for Contextful classes:
5845 // we unwrap the proxy when we have a 'this reference'
5846 if (info.compIsContextful &&
5847 info.compUnwrapContextful &&
5850 objRef = fgUnwrapProxy(objRef);
5854 Now we have a tree like this:
5856 +--------------------+
5858 +----------+---------+
5860 +--------------+-------------+
5861 | tree->gtField.gtFldObj |
5862 +--------------+-------------+
5865 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
5867 +--------------------+
5868 | GT_IND/GT_OBJ | tree
5869 +---------+----------+
5872 +---------+----------+
5874 +---------+----------+
5879 +-------------------+ +----------------------+
5880 | objRef | | fldOffset |
5881 | | | (when fldOffset !=0) |
5882 +-------------------+ +----------------------+
5885 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
5888 +--------------------+
5889 | GT_IND/GT_OBJ | tree
5890 +----------+---------+
5892 +----------+---------+
5894 +----------+---------+
5900 +---------+----------+ +---------+----------+
5901 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
5902 +---------+----------+ +---------+----------+
5907 +-----+-----+ +-----+-----+ +---------+ +-----------+
5908 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
5909 +-----+-----+ +-----+-----+ +---------+ +-----------+
5914 +-----+-----+ +-----+-----+ +-----------+
5915 | tmpLcl | | objRef | | tmpLcl |
5916 +-----------+ +-----------+ +-----------+
5921 var_types objRefType = objRef->TypeGet();
5923 GenTreePtr comma = NULL;
5925 bool addedExplicitNullCheck = false;
5927 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
5928 // and thus is equivalent to a MACK_Ind with zero offset.
5929 MorphAddrContext defMAC(MACK_Ind);
5930 if (mac == NULL) mac = &defMAC;
5932 // This flag is set to enable the "conservative" style of explicit null-check insertion.
5933 // This means that we insert an explicit null check whenever we create byref by adding a
5934 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
5935 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
5936 // small offsets); in this plan, we would transfer some null-checking responsibility to
5937 // callee's of methods taking byref parameters. They would have to add explicit null checks
5938 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
5939 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
5940 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
5941 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
5942 /// This is left here to point out how to implement it.
5943 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
5945 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
5946 // whose address is being taken is either a local or static variable, whose address is necessarily
5947 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
5948 if (objRef->gtOper != GT_ADDR
5949 && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind)
5950 && (!mac->m_allConstantOffsets
5951 || fgIsBigOffset(mac->m_totalOffset + fldOffset)
5952 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
5953 || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
5955 || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
5962 printf("Before explicit null check morphing:\n");
5968 // Create the "comma" subtree
5970 GenTreePtr asg = NULL;
5975 if (objRef->gtOper != GT_LCL_VAR)
5977 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
5979 // Create the "asg" node
5980 asg = gtNewTempAssign(lclNum, objRef);
5984 lclNum = objRef->gtLclVarCommon.gtLclNum;
5987 // Create the "nullchk" node
5988 nullchk = gtNewOperNode(GT_NULLCHECK,
5989 TYP_BYTE, // Make it TYP_BYTE so we only deference it for 1 byte.
5990 gtNewLclvNode(lclNum, objRefType));
5991 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
5993 /* An indirection will cause a GPF if the address is null */
5994 nullchk->gtFlags |= GTF_EXCEPT;
5998 // Create the "comma" node.
5999 comma = gtNewOperNode(GT_COMMA,
6000 TYP_VOID, // We don't want to return anything from this "comma" node.
6001 // Set the type to TYP_VOID, so we can select "cmp" instruction
6002 // instead of "mov" instruction later on.
6011 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6013 addedExplicitNullCheck = true;
6015 else if (fldOffset == 0)
6017 // Generate the "addr" node.
6019 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6020 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6027 #ifdef FEATURE_READYTORUN_COMPILER
6028 if (tree->gtField.gtFieldLookup.addr != nullptr)
6030 GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6032 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6033 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6035 addr = gtNewOperNode(GT_ADD,
6036 (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL
6045 // Generate the "addr" node.
6046 /* Add the member offset to the object's address */
6047 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6048 addr = gtNewOperNode(GT_ADD,
6049 (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL
6052 gtNewIconHandleNode(fldOffset,
6057 // Now let's set the "tree" as a GT_IND tree.
6059 tree->SetOper(GT_IND);
6060 tree->gtOp.gtOp1 = addr;
6062 if (fgAddrCouldBeNull(addr))
6064 // This indirection can cause a GPF if the address could be null.
6065 tree->gtFlags |= GTF_EXCEPT;
6068 if (addedExplicitNullCheck)
6071 // Create "comma2" node and link it to "tree".
6074 comma2 = gtNewOperNode(GT_COMMA,
6075 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6078 tree->gtOp.gtOp1 = comma2;
6084 if (addedExplicitNullCheck) {
6085 printf("After adding explicit null check:\n");
6092 else /* This is a static data member */
6094 if (tree->gtFlags & GTF_IND_TLS_REF)
6096 // Thread Local Storage static field reference
6098 // Field ref is a TLS 'Thread-Local-Storage' reference
6100 // Build this tree: IND(*) #
6108 // IND(I_IMPL) == [Base of this DLL's TLS]
6112 // / CNS(IdValue*4) or MUL
6114 // IND(I_IMPL) / CNS(4)
6116 // CNS(TLS_HDL,0x2C) IND
6120 // # Denotes the orginal node
6122 void ** pIdAddr = NULL;
6123 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**) &pIdAddr);
6126 // If we can we access the TLS DLL index ID value directly
6127 // then pIdAddr will be NULL and
6128 // IdValue will be the actual TLS DLL index ID
6130 GenTreePtr dllRef = NULL;
6131 if (pIdAddr == NULL)
6134 dllRef = gtNewIconNode(IdValue*4, TYP_I_IMPL);
6138 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6139 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6140 dllRef->gtFlags |= GTF_IND_INVARIANT;
6144 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6147 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6149 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6151 GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6153 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6157 /* Add the dllRef */
6158 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6161 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6162 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6166 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6167 GenTreePtr fldOffsetNode = new(this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6169 /* Add the TLS static field offset to the address */
6171 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6174 // Final indirect to get to actual value of TLS static field
6176 tree->SetOper(GT_IND);
6177 tree->gtOp.gtOp1 = tlsRef;
6179 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6183 // Normal static field reference
6186 // If we can we access the static's address directly
6187 // then pFldAddr will be NULL and
6188 // fldAddr will be the actual address of the static field
6190 void ** pFldAddr = NULL;
6191 void * fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**) &pFldAddr);
6193 if (pFldAddr == NULL)
6195 #ifdef _TARGET_64BIT_
6196 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6198 // The address is not directly addressible, so force it into a
6199 // constant, so we handle it properly
6201 GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6202 addr->gtType = TYP_I_IMPL;
6203 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6204 addr->gtIntCon.gtFieldSeq = fieldSeq;
6206 tree->SetOper(GT_IND);
6207 tree->gtOp.gtOp1 = addr;
6209 return fgMorphSmpOp(tree);
6212 #endif // _TARGET_64BIT_
6214 // Only volatile could be set, and it maps over
6215 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
6216 noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
6217 tree->SetOper(GT_CLS_VAR);
6218 tree->gtClsVar.gtClsVarHnd = symHnd;
6219 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6220 tree->gtClsVar.gtFieldSeq = fieldSeq;
6227 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6229 // There are two cases here, either the static is RVA based,
6230 // in which case the type of the FIELD node is not a GC type
6231 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6232 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6233 // because handles to statics now go into the large object heap
6235 var_types handleTyp = (var_types) (varTypeIsGC(tree->TypeGet()) ? TYP_BYREF
6237 GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6238 op1->gtFlags |= GTF_IND_INVARIANT;
6240 tree->SetOper(GT_IND);
6241 tree->gtOp.gtOp1 = op1;
6245 noway_assert(tree->gtOper == GT_IND);
6247 GenTreePtr res = fgMorphSmpOp(tree);
6249 if (fldOffset == 0 && res->OperGet() == GT_IND)
6251 GenTreePtr addr = res->gtOp.gtOp1;
6252 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6253 FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6254 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6262 //------------------------------------------------------------------------------
6263 // fgMorphCallInline: attempt to inline a call
6266 // call - call expression to inline, inline candidate
6267 // inlineResult - result tracking and reporting
6270 // Attempts to inline the call.
6272 // If successful, callee's IR is inserted in place of the call, and
6273 // is marked with an InlineContext.
6275 // If unsuccessful, the transformations done in anticpation of a
6276 // possible inline are undone, and the candidate flag on the call
6279 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6281 // The call must be a candiate for inlining.
6282 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6284 // Attempt the inline
6285 fgMorphCallInlineHelper(call, inlineResult);
6287 // We should have made up our minds one way or another....
6288 assert(inlineResult->IsDecided());
6290 // If we failed to inline, we have a bit of work to do to cleanup
6291 if (inlineResult->IsFailure())
6296 // Before we do any cleanup, create a failing InlineContext to
6297 // capture details of the inlining attempt.
6298 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6302 // It was an inline candidate, but we haven't expanded it.
6303 if (call->gtCall.gtReturnType != TYP_VOID)
6305 // Detach the GT_CALL tree from the original statement by
6306 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6307 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6309 noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call);
6310 fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
6313 // Clear the Inline Candidate flag so we can ensure later we tried
6314 // inlining all candidates.
6316 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6320 /*****************************************************************************
6321 * Helper to attempt to inline a call
6322 * Sets success/failure in inline result
6323 * If success, modifies current method's IR with inlinee's IR
6324 * If failed, undoes any speculative modifications to current method
6327 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6329 // Don't expect any surprises here.
6330 assert(result->IsCandidate());
6332 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6334 // For now, attributing this to call site, though it's really
6335 // more of a budget issue (lvaCount currently includes all
6336 // caller and prospective callee locals). We still might be
6337 // able to inline other callees into this caller, or inline
6338 // this callee in other callers.
6339 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6343 if (call->IsVirtual())
6345 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6349 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6350 // and recursive tail calls as inline candidates.
6351 noway_assert(!call->IsTailPrefixedCall());
6352 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6354 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6355 Although we have checked this in impCanInline, it is possible that later IL instructions
6356 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6359 if (opts.compNeedSecurityCheck)
6361 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6366 // Calling inlinee's compiler to inline the method.
6369 unsigned startVars = lvaCount;
6374 printf("Expanding INLINE_CANDIDATE in statement ");
6375 printTreeID(fgMorphStmt);
6376 printf(" in BB%02u:\n", compCurBB->bbNum);
6377 gtDispTree(fgMorphStmt);
6379 // printf("startVars=%d.\n", startVars);
6384 // Invoke the compiler to inline the call.
6387 fgInvokeInlineeCompiler(call, result);
6389 if (result->IsFailure())
6391 // Undo some changes made in anticipation of inlining...
6393 // Zero out the used locals
6394 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6395 for (unsigned i = startVars; i < lvaCount; i++)
6397 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6400 lvaCount = startVars;
6405 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6417 // printf("After inlining lvaCount=%d.\n", lvaCount);
6422 /*****************************************************************************
6424 * Performs checks to see if this tail call can be optimized as epilog+jmp.
6426 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6428 #if FEATURE_FASTTAILCALL
6429 // Reached here means that return types of caller and callee are tail call compatible.
6430 // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6432 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6433 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6434 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6435 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6436 // currently not copying/setting callSig.
6438 if (callee->IsTailPrefixedCall())
6440 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6441 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6445 // Note on vararg methods:
6446 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6447 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6448 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6449 // out-going area required for callee is bounded by caller's fixed argument space.
6451 // Note that callee being a vararg method is not a problem since we can account the params being passed.
6453 // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6454 unsigned nCallerArgs = info.compArgsCount;
6456 // Count the callee args including implicit and hidden.
6457 // Note that GenericContext and VarargCookie are added by importer while
6458 // importing the call to gtCallArgs list along with explicit user args.
6459 unsigned nCalleeArgs = 0;
6460 if (callee->gtCallObjp) // thisPtr
6465 if (callee->HasRetBufArg()) // RetBuf
6469 // If callee has RetBuf param, caller too must have it.
6470 // Otherwise go the slow route.
6471 if (info.compRetBuffArg == BAD_VAR_NUM)
6477 // Count user args while tracking whether any of them is a multi-byte params
6478 // that cannot be passed in a register. Note that we don't need to count
6479 // non-standard and secret params passed in registers (e.g. R10, R11) since
6480 // these won't contribute to out-going arg size.
6481 bool hasMultiByteArgs = false;
6482 for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6486 assert(args->IsList());
6487 GenTreePtr argx = args->gtOp.gtOp1;
6489 if (varTypeIsStruct(argx))
6491 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
6492 while (argx->gtOper == GT_COMMA)
6494 argx = argx->gtOp.gtOp2;
6497 // Get the size of the struct and see if it is register passable.
6498 if (argx->OperGet() == GT_OBJ)
6500 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
6502 unsigned typeSize = 0;
6503 hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), argx->gtObj.gtClass, &typeSize, false);
6505 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
6506 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
6507 // Account for the second eightbyte in the nCalleeArgs.
6508 // https://github.com/dotnet/coreclr/issues/2666
6509 // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
6510 // as two args, since they need two registers whereas nCallerArgs is
6511 // counting such an arg as one. This would mean we will not be optimizing
6512 // certain calls though technically possible.
6514 if (typeSize > TARGET_POINTER_SIZE)
6516 unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
6517 nCalleeArgs += extraArgRegsToAdd;
6519 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
6522 assert(!"Target platform ABI rules regarding passing struct type args in registers");
6524 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
6529 hasMultiByteArgs = true;
6534 // Go the slow route, if it has multi-byte params
6535 if (hasMultiByteArgs)
6540 // If we reached here means that callee has only those argument types which can be passed in
6541 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
6542 // If we are passing args on stack for callee and it has more args passed on stack than
6543 // caller, then fast tail call cannot be performed.
6545 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
6546 // as non-interruptible for fast tail calls.
6547 if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
6559 /*****************************************************************************
6561 * Transform the given GT_CALL tree for tail call code generation.
6563 void Compiler::fgMorphTailCall(GenTreeCall* call)
6565 // x86 classic codegen doesn't require any morphing
6566 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
6567 NYI_X86("Tail call morphing");
6568 #elif defined(_TARGET_ARM_)
6569 // For the helper-assisted tail calls, we need to push all the arguments
6570 // into a single list, and then add a few extra at the beginning
6572 // Check for PInvoke call types that we don't handle in codegen yet.
6573 assert(!call->IsUnmanaged());
6574 assert(call->IsVirtual() ||
6575 (call->gtCallType != CT_INDIRECT) ||
6576 (call->gtCallCookie == NULL));
6578 // First move the this pointer (if any) onto the regular arg list
6579 GenTreePtr thisPtr = NULL;
6580 if (call->gtCallObjp)
6582 GenTreePtr objp = call->gtCallObjp;
6583 call->gtCallObjp = NULL;
6585 if ((call->gtFlags & GTF_CALL_NULLCHECK) ||
6586 call->IsVirtualVtable())
6588 thisPtr = gtClone(objp, true);
6589 var_types vt = objp->TypeGet();
6590 if (thisPtr == NULL)
6592 // Too complex, so use a temp
6593 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
6594 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
6595 if (!call->IsVirtualVtable())
6597 // Add an indirection to get the nullcheck
6598 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
6599 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
6600 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
6602 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
6603 thisPtr = gtNewLclvNode(lclNum, vt);
6605 else if (!call->IsVirtualVtable())
6607 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
6608 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
6609 thisPtr = gtClone(thisPtr, true);
6612 call->gtFlags &= ~GTF_CALL_NULLCHECK;
6615 GenTreeArgList** pList = &call->gtCallArgs;
6616 #if RETBUFARG_PRECEDES_THIS
6617 if (call->HasRetBufArg()) {
6618 pList = &(*pList)->Rest();
6620 #endif // RETBUFARG_PRECEDES_THIS
6621 *pList = gtNewListNode(objp, *pList);
6624 // Add the extra VSD parameter if needed
6625 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
6626 if (call->IsVirtualStub())
6628 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
6631 if (call->gtCallType == CT_INDIRECT) {
6632 arg = gtClone(call->gtCallAddr, true);
6633 noway_assert(arg != NULL);
6636 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
6637 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
6638 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6640 // Change the call type, so we can add the extra indirection here, rather than in codegen
6641 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6642 call->gtStubCallStubAddr = NULL;
6643 call->gtCallType = CT_INDIRECT;
6645 // Add the extra indirection to generate the real target
6646 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
6647 call->gtFlags |= GTF_EXCEPT;
6649 // And push the stub address onto the list of arguments
6650 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6652 else if (call->IsVirtualVtable())
6654 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
6656 noway_assert(thisPtr != NULL);
6658 GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
6659 GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6660 vtbl->gtFlags |= GTF_EXCEPT;
6662 unsigned vtabOffsOfIndirection;
6663 unsigned vtabOffsAfterIndirection;
6664 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
6666 /* Get the appropriate vtable chunk */
6668 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
6669 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6671 /* Now the appropriate vtable slot */
6673 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
6674 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6676 // Switch this to a plain indirect call
6677 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
6678 assert(!call->IsVirtual());
6679 call->gtCallType = CT_INDIRECT;
6681 call->gtCallAddr = vtbl;
6682 call->gtCallCookie = NULL;
6683 call->gtFlags |= GTF_EXCEPT;
6686 // Now inject a placeholder for the real call target that codegen
6688 GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
6689 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
6690 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6692 // Lastly inject the pointer for the copy routine
6693 noway_assert(call->callSig != NULL);
6694 void * pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
6695 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
6696 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6698 // It is now a varargs tail call
6699 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
6700 call->gtFlags &= ~GTF_CALL_POP_ARGS;
6702 #elif defined(_TARGET_AMD64_)
6703 // For the helper-assisted tail calls, we need to push all the arguments
6704 // into a single list, and then add a few extra at the beginning.
6706 // TailCallHelper(void *copyRoutine, void *callTarget, ....) - i.e We need to add
6707 // copyRoutine and callTarget extra params at the beginning. But callTarget is
6708 // determined by Lower phase. Therefore, we add a place holder arg for callTarget
6709 // here which will be later replaced with callTarget in tail call lowering.
6711 // Check for PInvoke call types that we don't handle in codegen yet.
6712 assert(!call->IsUnmanaged());
6713 assert(call->IsVirtual() ||
6714 (call->gtCallType != CT_INDIRECT) ||
6715 (call->gtCallCookie == NULL));
6717 // Don't support tail calling helper methods
6718 assert(call->gtCallType != CT_HELPER);
6720 // We come this route only for tail prefixed calls that cannot be dispatched as
6722 assert(!call->IsImplicitTailCall());
6723 assert(!fgCanFastTailCall(call));
6725 // First move the this pointer (if any) onto the regular arg list
6726 if (call->gtCallObjp)
6728 GenTreePtr thisPtr = nullptr;
6729 GenTreePtr objp = call->gtCallObjp;
6730 call->gtCallObjp = nullptr;
6732 if (call->NeedsNullCheck())
6734 // clone "this" if "this" has no side effects.
6735 if (!(objp->gtFlags & GTF_SIDE_EFFECT))
6737 thisPtr = gtClone(objp, true);
6740 var_types vt = objp->TypeGet();
6741 if (thisPtr == nullptr)
6743 // create a temp if either "this" has side effects or "this" is too complex to clone.
6746 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
6747 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
6749 // COMMA(tmp = "this", deref(tmp))
6750 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
6751 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
6752 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
6754 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
6755 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
6759 // thisPtr = COMMA(deref("this"), "this")
6760 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
6761 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
6764 call->gtFlags &= ~GTF_CALL_NULLCHECK;
6771 GenTreeArgList** pList = &call->gtCallArgs;
6772 #if RETBUFARG_PRECEDES_THIS
6773 if (call->HasRetBufArg()) {
6774 pList = &(*pList)->Rest();
6776 #endif // RETBUFARG_PRECEDES_THIS
6778 // During rationalization tmp="this" and null check will
6779 // materialize as embedded stmts in right execution order.
6780 assert(thisPtr != nullptr);
6781 *pList = gtNewListNode(thisPtr, *pList);
6784 // Add the extra VSD parameter to arg list in case of VSD calls.
6785 // Tail call arg copying thunk will move this extra VSD parameter
6786 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
6787 // in Stublinkerx86.cpp for more details.
6788 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
6789 if (call->IsVirtualStub())
6791 GenTreePtr stubAddrArg;
6793 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
6795 if (call->gtCallType == CT_INDIRECT)
6797 stubAddrArg = gtClone(call->gtCallAddr, true);
6798 noway_assert(stubAddrArg != nullptr);
6802 noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
6804 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
6805 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6808 // Push the stub address onto the list of arguments
6809 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
6812 // Now inject a placeholder for the real call target that Lower phase will generate.
6813 GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
6814 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6816 // Inject the pointer for the copy routine to be used for struct copying
6817 noway_assert(call->callSig != nullptr);
6818 void * pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
6819 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
6820 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6822 // It is now a varargs tail call dispatched via helper.
6823 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
6824 call->gtFlags &= ~GTF_CALL_POP_ARGS;
6826 #endif //_TARGET_AMD64_
6830 //------------------------------------------------------------------------------
6831 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
6835 // block - basic block ending with a recursive fast tail call
6836 // recursiveTailCall - recursive tail call to transform
6839 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
6841 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
6843 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
6844 GenTreePtr last = fgGetLastTopLevelStmt(block);
6845 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
6847 // Transform recursive tail call into a loop.
6849 GenTreePtr earlyArgInsertionPoint = last;
6850 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
6852 // Hoist arg setup statement for the 'this' argument.
6853 GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
6854 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
6856 GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
6857 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
6861 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
6862 // then the temps need to be assigned to the method parameters. This is done so that the caller
6863 // parameters are not re-assigned before call arguments depending on them are evaluated.
6864 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
6865 // where the next temp or parameter assignment should be inserted.
6867 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
6868 // while the second call argument (const 1) doesn't.
6869 // Basic block before tail recursion elimination:
6870 // ***** BB04, stmt 1 (top level)
6871 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
6872 // [000033] --C - G------ - \--* call void RecursiveMethod
6873 // [000030] ------------ | / --* const int - 1
6874 // [000031] ------------arg0 in rcx + --* +int
6875 // [000029] ------------ | \--* lclVar int V00 arg1
6876 // [000032] ------------arg1 in rdx \--* const int 1
6879 // Basic block after tail recursion elimination :
6880 // ***** BB04, stmt 1 (top level)
6881 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
6882 // [000030] ------------ | / --* const int - 1
6883 // [000031] ------------ | / --* +int
6884 // [000029] ------------ | | \--* lclVar int V00 arg1
6885 // [000050] - A---------- \--* = int
6886 // [000049] D------N---- \--* lclVar int V02 tmp0
6888 // ***** BB04, stmt 2 (top level)
6889 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
6890 // [000052] ------------ | / --* lclVar int V02 tmp0
6891 // [000054] - A---------- \--* = int
6892 // [000053] D------N---- \--* lclVar int V00 arg0
6894 // ***** BB04, stmt 3 (top level)
6895 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
6896 // [000032] ------------ | / --* const int 1
6897 // [000057] - A---------- \--* = int
6898 // [000056] D------N---- \--* lclVar int V01 arg1
6900 GenTreePtr tmpAssignmentInsertionPoint = last;
6901 GenTreePtr paramAssignmentInsertionPoint = last;
6903 // Process early args. They may contain both setup statements for late args and actual args.
6904 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
6905 // below has the correct second argument.
6906 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
6907 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs;
6908 earlyArgs != nullptr;
6909 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
6911 GenTreePtr earlyArg = earlyArgs->Current();
6912 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
6914 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
6916 // This is a setup node so we need to hoist it.
6917 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
6918 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
6922 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
6923 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
6924 GenTreePtr paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
6925 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
6926 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
6928 // All temp assignments will happen before the first param assignment.
6929 tmpAssignmentInsertionPoint = paramAssignStmt;
6935 // Process late args.
6936 int lateArgIndex = 0;
6937 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs;
6938 lateArgs != nullptr;
6939 (lateArgIndex++, lateArgs = lateArgs->Rest()))
6941 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
6942 GenTreePtr lateArg = lateArgs->Current();
6943 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
6944 GenTreePtr paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
6945 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
6947 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
6949 // All temp assignments will happen before the first param assignment.
6950 tmpAssignmentInsertionPoint = paramAssignStmt;
6954 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
6955 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
6956 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
6957 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
6959 var_types thisType = lvaTable[info.compThisArg].TypeGet();
6960 GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
6961 GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
6962 GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
6963 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
6967 fgRemoveStmt(block, last);
6969 // Set the loop edge.
6970 block->bbJumpKind = BBJ_ALWAYS;
6971 block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
6972 fgAddRefPred(block->bbJumpDest, block);
6973 block->bbFlags &= ~BBF_HAS_JMP;
6976 //------------------------------------------------------------------------------
6977 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
6981 // arg - argument to assign
6982 // argTabEntry - argument table entry corresponding to arg
6983 // block --- basic block the call is in
6984 // callILOffset - IL offset of the call
6985 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
6986 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
6989 // parameter assignment statement if one was inserted; nullptr otherwise.
6991 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
6992 fgArgTabEntryPtr argTabEntry,
6994 IL_OFFSETX callILOffset,
6995 GenTreePtr tmpAssignmentInsertionPoint,
6996 GenTreePtr paramAssignmentInsertionPoint)
6998 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
6999 // some argument trees may reference parameters directly.
7001 GenTreePtr argInTemp = nullptr;
7002 unsigned originalArgNum = argTabEntry->argNum;
7003 bool needToAssignParameter = true;
7005 // TODO-CQ: enable calls with struct arguments passed in registers.
7006 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7008 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7010 // The argument is already assigned to a temp or is a const.
7013 else if (arg->OperGet() == GT_LCL_VAR)
7015 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7016 LclVarDsc * varDsc = &lvaTable[lclNum];
7017 if (!varDsc->lvIsParam)
7019 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7022 else if (lclNum == originalArgNum)
7024 // The argument is the same parameter local that we were about to assign so
7025 // we can skip the assignment.
7026 needToAssignParameter = false;
7030 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7031 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7032 // more temp assignments.
7034 GenTreePtr paramAssignStmt = nullptr;
7035 if (needToAssignParameter)
7037 if (argInTemp == nullptr)
7039 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7040 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7041 // doesn't involve any caller parameters.
7042 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7043 GenTreePtr tempSrc = arg;
7044 GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7045 GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7046 GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7047 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7048 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7051 // Now assign the temp to the parameter.
7052 LclVarDsc *paramDsc = lvaTable + originalArgNum;
7053 assert(paramDsc->lvIsParam);
7054 GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7055 GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7056 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7058 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7060 return paramAssignStmt;
7063 /*****************************************************************************
7065 * Transform the given GT_CALL tree for code generation.
7068 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7070 if (call->CanTailCall())
7072 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7073 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7075 // It cannot be an inline candidate
7076 assert(!call->IsInlineCandidate());
7078 const char * szFailReason = nullptr;
7079 bool hasStructParam = false;
7080 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7082 szFailReason = "Might turn into an intrinsic";
7085 if (opts.compNeedSecurityCheck)
7087 szFailReason = "Needs security check";
7089 else if (compLocallocUsed)
7091 szFailReason = "Localloc used";
7093 #ifdef _TARGET_AMD64_
7094 // Needed for Jit64 compat.
7095 // In future, enabling tail calls from methods that need GS cookie check
7096 // would require codegen side work to emit GS cookie check before a tail
7098 else if (getNeedsGSSecurityCookie())
7100 szFailReason = "GS Security cookie check";
7104 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7105 else if (opts.compGcChecks)
7107 szFailReason = "GcChecks";
7110 #if FEATURE_TAILCALL_OPT
7113 // We are still not sure whether it can be a tail call. Because, when converting
7114 // a call to an implicit tail call, we must check that there are no locals with
7115 // their address taken. If this is the case, we have to assume that the address
7116 // has been leaked and the current stack frame must live until after the final
7119 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7120 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
7121 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7122 // during morph stage. The reason for also checking lvAddrExposed is that in case
7123 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7124 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7125 // never to be incorrect.
7127 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7128 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7129 // is set. This avoids the need for iterating through all lcl vars of the current
7130 // method. Right now throughout the code base we are not consistently using 'set'
7131 // method to set lvHasLdAddrOp and lvAddrExposed flags.
7134 bool hasAddrExposedVars = false;
7135 bool hasStructPromotedParam = false;
7136 bool hasPinnedVars = false;
7138 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7140 // If the method is marked as an explicit tail call we will skip the
7141 // following three hazard checks.
7142 // We still must check for any struct parameters and set 'hasStructParam'
7143 // so that we won't transform the recursive tail call into a loop.
7145 if (call->IsImplicitTailCall())
7147 if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
7149 hasAddrExposedVars = true;
7152 if (varDsc->lvPromoted && varDsc->lvIsParam)
7154 hasStructPromotedParam = true;
7157 if (varDsc->lvPinned)
7159 // A tail call removes the method from the stack, which means the pinning
7160 // goes away for the callee. We can't allow that.
7161 hasPinnedVars = true;
7165 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7167 hasStructParam = true;
7168 // This prevents transforming a recursive tail call into a loop
7169 // but doesn't prevent tail call optimization so we need to
7170 // look at the rest of parameters.
7175 if (hasAddrExposedVars)
7177 szFailReason = "Local address taken";
7179 if (hasStructPromotedParam)
7181 szFailReason = "Has Struct Promoted Param";
7185 szFailReason = "Has Pinned Vars";
7188 #endif // FEATURE_TAILCALL_OPT
7190 fgFixupStructReturn(call);
7192 var_types callType = call->TypeGet();
7194 // We have to ensure to pass the incoming retValBuf as the
7195 // outgoing one. Using a temp will not do as this function will
7196 // not regain control to do the copy.
7198 if (info.compRetBuffArg != BAD_VAR_NUM)
7200 noway_assert(callType == TYP_VOID);
7201 GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7202 if (retValBuf->gtOper != GT_LCL_VAR ||
7203 retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7205 szFailReason = "Need to copy return buffer";
7209 // If this is an opportunistic tail call and cannot be dispatched as
7210 // fast tail call, go the non-tail call route. This is done for perf
7213 // Avoid the cost of determining whether can be dispatched as fast tail
7214 // call if we already know that tail call cannot be honored for other
7216 bool canFastTailCall = false;
7217 if (szFailReason == nullptr)
7219 canFastTailCall = fgCanFastTailCall(call);
7220 if (!canFastTailCall)
7222 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7223 // mechanism and never via tail call helper for perf.
7224 if (call->IsImplicitTailCall())
7226 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7228 #ifndef LEGACY_BACKEND
7229 else if (!call->IsVirtualStub() && call->HasNonStandardArgs())
7231 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7232 // dispatched as a fast tail call.
7234 // Methods with non-standard args will have indirection cell or cookie param passed
7235 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7236 // tail calling the target method and hence ".tail" prefix on such calls needs to be
7239 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls though require
7240 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7241 // This is done by by adding stubAddr as an additional arg before the original list of
7242 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7243 // in Stublinkerx86.cpp.
7244 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail called via helper";
7246 #ifdef _TARGET_ARM64_
7249 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7250 // So, bail out if we can't make fast tail call.
7251 szFailReason = "Non-qualified fast tail call";
7254 #endif //LEGACY_BACKEND
7258 // Clear these flags before calling fgMorphCall() to avoid recursion.
7259 bool isTailPrefixed = call->IsTailPrefixedCall();
7260 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7262 #if FEATURE_TAILCALL_OPT
7263 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7267 if (!canFastTailCall && szFailReason == nullptr)
7269 szFailReason = "Non fast tail calls disabled for PAL based systems.";
7271 #endif // FEATURE_PAL
7273 if (szFailReason != nullptr)
7277 printf("\nRejecting tail call late for call ");
7279 printf(": %s\n", szFailReason);
7283 // for non user funcs, we have no handles to report
7284 info.compCompHnd->reportTailCallDecision(nullptr,
7285 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7286 isTailPrefixed, TAILCALL_FAIL, szFailReason);
7291 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7292 // We enable shared-ret tail call optimization for recursive calls even if
7293 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7294 if (gtIsRecursiveCall(call))
7297 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7298 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7299 if (compCurBB->bbJumpKind != BBJ_RETURN)
7300 compCurBB->bbJumpKind = BBJ_RETURN;
7304 // Set this flag before calling fgMorphCall() to prevent inlining this call.
7305 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7307 bool fastTailCallToLoop = false;
7308 #if FEATURE_TAILCALL_OPT
7309 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7310 // or return type is a struct that can be passed in a register.
7312 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7313 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7314 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
7315 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7316 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
7317 // generic type parameters of both caller and callee generic method are the same.
7318 if (opts.compTailCallLoopOpt &&
7320 gtIsRecursiveCall(call) &&
7321 !lvaReportParamTypeArg() &&
7322 !lvaKeepAliveAndReportThis() &&
7323 !call->IsVirtual() &&
7325 !varTypeIsStruct(call->TypeGet()))
7327 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7328 fastTailCallToLoop = true;
7332 // Do some target-specific transformations (before we process the args, etc.)
7333 // This is needed only for tail prefixed calls that cannot be dispatched as
7335 if (!canFastTailCall)
7337 fgMorphTailCall(call);
7340 // Implementation note : If we optimize tailcall to do a direct jump
7341 // to the target function (after stomping on the return address, etc),
7342 // without using CORINFO_HELP_TAILCALL, we have to make certain that
7343 // we don't starve the hijacking logic (by stomping on the hijacked
7344 // return address etc).
7346 // At this point, we are committed to do the tailcall.
7347 compTailCallUsed = true;
7349 CorInfoTailCall tailCallResult;
7351 if (fastTailCallToLoop)
7353 tailCallResult = TAILCALL_RECURSIVE;
7355 else if (canFastTailCall)
7357 tailCallResult = TAILCALL_OPTIMIZED;
7361 tailCallResult = TAILCALL_HELPER;
7364 // for non user funcs, we have no handles to report
7365 info.compCompHnd->reportTailCallDecision(nullptr,
7366 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7371 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
7372 // to avoid doing any extra work for the return value.
7373 call->gtType = TYP_VOID;
7378 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
7381 if (fastTailCallToLoop)
7383 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
7391 GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr;
7394 // Tail call needs to be in one of the following IR forms
7395 // Either a call stmt or
7396 // GT_RETURN(GT_CALL(..)) or
7398 noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) ||
7399 (stmtExpr->gtOper == GT_RETURN && (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) ||
7400 (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call));
7403 // For void calls, we would have created a GT_CALL in the stmt list.
7404 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
7405 // For calls returning structs, we would have a void call, followed by a void return.
7406 // For debuggable code, it would be an assignment of the call to a temp
7407 // We want to get rid of any of this extra trees, and just leave
7409 GenTreePtr nextMorphStmt = fgMorphStmt->gtNext;
7411 #ifdef _TARGET_AMD64_
7412 // Legacy Jit64 Compat:
7413 // There could be any number of GT_NOPs between tail call and GT_RETURN.
7414 // That is tail call pattern could be one of the following:
7415 // 1) tail.call, nop*, ret
7416 // 2) tail.call, nop*, pop, nop*, ret
7417 // 3) var=tail.call, nop*, ret(var)
7418 // 4) var=tail.call, nop*, pop, ret
7420 // See impIsTailCallILPattern() for details on tail call IL patterns
7421 // that are supported.
7422 if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
7424 // First delete all GT_NOPs after the call
7425 GenTreePtr morphStmtToRemove = nullptr;
7426 while (nextMorphStmt != nullptr)
7428 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
7429 if (!nextStmtExpr->IsNothingNode())
7434 morphStmtToRemove = nextMorphStmt;
7435 nextMorphStmt = morphStmtToRemove->gtNext;
7436 fgRemoveStmt(compCurBB, morphStmtToRemove);
7439 // Check to see if there is a pop.
7440 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
7441 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN)
7443 // Note that pop opcode may or may not result in a new stmt (for details see
7444 // impImportBlockCode()). Hence, it is not possible to assert about the IR
7445 // form generated by pop but pop tree must be side-effect free so that we can
7446 // delete it safely.
7447 GenTreePtr popStmt = nextMorphStmt;
7448 nextMorphStmt = nextMorphStmt->gtNext;
7450 noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
7451 fgRemoveStmt(compCurBB, popStmt);
7454 // Next delete any GT_NOP nodes after pop
7455 while (nextMorphStmt != nullptr)
7457 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
7458 if (!nextStmtExpr->IsNothingNode())
7463 morphStmtToRemove = nextMorphStmt;
7464 nextMorphStmt = morphStmtToRemove->gtNext;
7465 fgRemoveStmt(compCurBB, morphStmtToRemove);
7468 #endif // _TARGET_AMD64_
7470 // Delete GT_RETURN if any
7471 if (nextMorphStmt != nullptr)
7473 GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr;
7474 noway_assert(retExpr->gtOper == GT_RETURN);
7476 // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
7477 // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
7478 if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
7480 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
7481 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum == retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
7484 fgRemoveStmt(compCurBB, nextMorphStmt);
7487 fgMorphStmt->gtStmt.gtStmtExpr = call;
7489 // Tail call via helper: The VM can't use return address hijacking if we're
7490 // not going to return and the helper doesn't have enough info to safely poll,
7491 // so we poll before the tail call, if the block isn't already safe. Since
7492 // tail call via helper is a slow mechanism it doen't matter whether we emit
7493 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
7494 // size increase if all most all methods are expected to be tail calls (e.g. F#).
7496 // Note that we can avoid emitting GC-poll if we know that the current BB is
7497 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
7498 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
7499 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
7500 // now it not clear whether optimizing slow tail calls is worth the effort. As a
7501 // low cost check, we check whether the first and current basic blocks are
7504 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
7505 // is going to mark the method as fully interruptible if the block containing this tail
7506 // call is reachable without executing any call.
7507 if (canFastTailCall ||
7508 (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) ||
7509 (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
7510 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
7512 // We didn't insert a poll block, so we need to morph the call now
7513 // (Normally it will get morphed when we get to the split poll block)
7514 GenTreePtr temp = fgMorphCall(call);
7515 noway_assert(temp == call);
7518 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
7519 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
7521 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
7522 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
7523 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
7525 if (canFastTailCall)
7527 compCurBB->bbFlags |= BBF_HAS_JMP;
7531 compCurBB->bbJumpKind = BBJ_THROW;
7534 // For non-void calls, we return a place holder which will be
7535 // used by the parent GT_RETURN node of this call.
7537 GenTree* result = call;
7538 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
7541 // Return a dummy node, as the return is already removed.
7542 if (callType == TYP_STRUCT)
7544 // This is a HFA, use float 0.
7545 callType = TYP_FLOAT;
7547 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
7548 // Return a dummy node, as the return is already removed.
7549 if (varTypeIsStruct(callType))
7551 // This is a register-returned struct. Return a 0.
7552 // The actual return registers are hacked in lower and the register allocator.
7557 // Return a dummy node, as the return is already removed.
7558 if (varTypeIsSIMD(callType))
7560 callType = TYP_DOUBLE;
7563 result = gtNewZeroConNode(genActualType(callType));
7564 result = fgMorphTree(result);
7572 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
7573 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
7574 #ifdef FEATURE_READYTORUN_COMPILER
7575 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
7578 (call == fgMorphStmt->gtStmt.gtStmtExpr))
7580 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
7581 // Transform it into a null check.
7583 GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
7585 GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
7586 nullCheck->gtFlags |= GTF_EXCEPT;
7588 return fgMorphTree(nullCheck);
7591 noway_assert(call->gtOper == GT_CALL);
7594 // Only count calls once (only in the global morph phase)
7598 if (call->gtCallType == CT_INDIRECT)
7601 optIndirectCallCount++;
7603 else if (call->gtCallType == CT_USER_FUNC)
7606 if (call->IsVirtual())
7607 optIndirectCallCount++;
7611 // Couldn't inline - remember that this BB contains method calls
7613 // If this is a 'regular' call, mark the basic block as
7614 // having a call (for computing full interruptibility).
7616 // Amd64 note: If this is a fast tail call then don't count it as a call
7617 // since we don't insert GC-polls but instead make the method fully GC
7619 #ifdef _TARGET_AMD64_
7620 if (!call->IsFastTailCall())
7623 if (call->gtCallType == CT_INDIRECT)
7625 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
7627 else if (call->gtCallType == CT_USER_FUNC)
7629 if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
7630 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
7632 // otherwise we have a CT_HELPER
7635 // Morph Type.op_Equality and Type.op_Inequality
7636 // We need to do this before the arguments are morphed
7637 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
7639 CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
7641 genTreeOps simpleOp = GT_CALL;
7642 if (methodID == CORINFO_INTRINSIC_TypeEQ)
7644 else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
7647 if (simpleOp == GT_EQ || simpleOp == GT_NE)
7649 noway_assert(call->TypeGet() == TYP_INT);
7651 // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType objects.
7652 // Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to GT_NE/GT_NE:
7653 // One important invariance that should never change is that type equivalency is always equivalent to object
7654 // identity equality for runtime type objects in reflection. This is also reflected in RuntimeTypeHandle::TypeEquals.
7655 // If this invariance would ever be broken, we need to remove the optimization below.
7657 GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
7658 GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
7660 if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
7662 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
7664 // fgMorphSmpOp will further optimize the following patterns:
7665 // 1. typeof(...) == typeof(...)
7666 // 2. typeof(...) == obj.GetType()
7667 return fgMorphTree(compare);
7672 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
7673 GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require copy-back).
7674 unsigned retValTmpNum = BAD_VAR_NUM;
7675 CORINFO_CLASS_HANDLE structHnd = nullptr;
7676 if (call->HasRetBufArg() &&
7677 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
7679 // We're enforcing the invariant that return buffers pointers (at least for
7680 // struct return types containing GC pointers) are never pointers into the heap.
7681 // The large majority of cases are address of local variables, which are OK.
7682 // Otherwise, allocate a local of the given struct type, pass its address,
7683 // then assign from that into the proper destination. (We don't need to do this
7684 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
7685 // will maintain the same invariant.)
7687 GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
7688 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
7689 if (dest->gtType == TYP_BYREF
7690 && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
7692 // We'll exempt helper calls from this, assuming that the helper implementation
7693 // follows the old convention, and does whatever barrier is required.
7694 if (call->gtCallType != CT_HELPER)
7696 structHnd = call->gtRetClsHnd;
7697 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd)
7698 && !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR)
7699 && dest->gtLclVar.gtLclNum == info.compRetBuffArg))
7703 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
7704 lvaSetStruct(retValTmpNum, structHnd, true);
7705 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
7710 call->gtCallArgs->gtOp.gtOp1 = dest;
7713 /* Process the "normal" argument list */
7714 call = fgMorphArgs(call);
7716 // Optimize get_ManagedThreadId(get_CurrentThread)
7717 noway_assert(call->gtOper == GT_CALL);
7719 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
7720 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
7721 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
7723 GenTreePtr value = gtArgEntryByArgNum(call, 2)->node;
7725 if (value->OperGet() == GT_CNS_INT && value->AsIntConCommon()->IconValue() == 0)
7727 GenTreePtr arr = gtArgEntryByArgNum(call, 0)->node;
7728 GenTreePtr index = gtArgEntryByArgNum(call, 1)->node;
7730 arr = gtClone(arr, true);
7733 index = gtClone(index, true);
7734 if (index != nullptr)
7736 value = gtClone(value);
7737 noway_assert(value != nullptr);
7739 GenTreePtr nullCheckedArr = impCheckForNullPointer(arr);
7740 GenTreePtr arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
7741 GenTreePtr arrStore = gtNewAssignNode(arrIndexNode, value);
7742 arrStore->gtFlags |= GTF_ASG;
7744 return fgMorphTree(arrStore);
7750 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
7751 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
7753 noway_assert(origDest == NULL);
7754 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != NULL);
7756 GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
7758 if (innerCall->gtOper == GT_CALL &&
7759 (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
7760 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_GetCurrentManagedThread)
7762 // substitute expression with call to helper
7763 GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
7764 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
7765 return fgMorphTree(newCall);
7769 if (origDest != NULL)
7771 GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
7772 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
7773 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
7774 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
7776 if (origDest->OperGet() == GT_ASG)
7778 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
7780 GenTreePtr var = origDest->gtOp.gtOp1;
7781 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
7782 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
7785 GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
7786 copyBlk = fgMorphTree(copyBlk);
7787 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
7789 result->gtFlags |= GTF_MORPHED;
7798 /*****************************************************************************
7800 * Transform the given GTK_CONST tree for code generation.
7803 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
7805 noway_assert(tree->OperKind() & GTK_CONST);
7807 /* Clear any exception flags or other unnecessary flags
7808 * that may have been set before folding this node to a constant */
7810 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
7812 if (tree->OperGet() != GT_CNS_STR)
7815 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
7816 // guarantee slow performance for that block. Instead cache the return value
7817 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
7819 if (compCurBB->bbJumpKind == BBJ_THROW)
7821 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
7822 if (helper != CORINFO_HELP_UNDEF)
7824 // For un-important blocks, we want to construct the string lazily
7826 GenTreeArgList *args;
7827 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
7829 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
7833 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
7834 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
7838 tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
7839 return fgMorphTree(tree);
7843 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
7846 InfoAccessType iat = info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd,
7847 tree->gtStrCon.gtSconCPX,
7850 tree = gtNewStringLiteralNode(iat, pValue);
7852 return fgMorphTree(tree);
7855 /*****************************************************************************
7857 * Transform the given GTK_LEAF tree for code generation.
7860 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
7862 noway_assert(tree->OperKind() & GTK_LEAF);
7864 if (tree->gtOper == GT_LCL_VAR)
7866 return fgMorphLocalVar(tree);
7869 else if (tree->gtOper == GT_LCL_FLD)
7871 if (info.compIsVarArgs)
7873 GenTreePtr newTree = fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
7874 if (newTree != NULL)
7878 #endif // _TARGET_X86_
7879 else if (tree->gtOper == GT_FTN_ADDR)
7881 CORINFO_CONST_LOOKUP addrInfo;
7883 #ifdef FEATURE_READYTORUN_COMPILER
7884 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
7886 addrInfo = tree->gtFptrVal.gtEntryPoint;
7891 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
7894 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
7896 tree->SetOper(GT_CNS_INT);
7897 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
7898 tree->gtFlags |= GTF_ICON_FTN_ADDR;
7900 switch (addrInfo.accessType)
7903 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
7904 tree->gtFlags |= GTF_IND_INVARIANT;
7909 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
7913 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
7917 noway_assert(!"Unknown addrInfo.accessType");
7920 return fgMorphTree(tree);
7927 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
7929 GenTreeLclVarCommon* lclVarCmnTree;
7930 bool isEntire = false;
7931 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
7935 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
7939 // We consider partial definitions to be modeled as uses followed by definitions.
7940 // This captures the idea that precedings defs are not necessarily made redundant
7941 // by this definition.
7942 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
7947 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
7949 genTreeOps oper = tree->gtOper;
7951 // Only xxBlk opcodes are possible
7952 noway_assert(tree->OperIsBlkOp());
7954 GenTreePtr dest = tree->gtOp.gtOp1->gtOp.gtOp1; // Dest address
7955 GenTreePtr src = tree->gtOp.gtOp1->gtOp.gtOp2; // Src
7956 GenTreePtr blkShape = tree->gtOp.gtOp2; // [size/clsHnd]
7957 bool volatil = tree->AsBlkOp()->IsVolatile();
7959 GenTreePtr lclVarTree;
7961 // The dest must be an address
7962 noway_assert(genActualType(dest->gtType) == TYP_I_IMPL ||
7963 dest->gtType == TYP_BYREF);
7965 // For COPYBLK the src must be an address
7966 noway_assert(!tree->OperIsCopyBlkOp() ||
7967 (genActualType( src->gtType) == TYP_I_IMPL ||
7968 src->gtType == TYP_BYREF));
7970 // For INITBLK the src must be a TYP_INT
7971 noway_assert(oper != GT_INITBLK ||
7972 (genActualType( src->gtType) == TYP_INT));
7974 // The size must be an integer type
7975 noway_assert(varTypeIsIntegral(blkShape->gtType));
7977 CORINFO_CLASS_HANDLE clsHnd;
7979 var_types type = TYP_UNDEF;
7981 if (blkShape->gtOper != GT_CNS_INT)
7985 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
7986 // The SIMD type in question could be Vector2f which is 8-bytes in size.
7987 // The below check is to make sure that we don't turn that copyblk
7988 // into a assignment, since rationalizer logic will transform the
7989 // copyblk apropriately. Otherwise, the transormation made in this
7990 // routine will prevent rationalizer logic and we might end up with
7991 // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
7993 if (src->OperGet() == GT_ADDR && src->gtGetOp1()->OperGet() == GT_SIMD)
7997 if (!blkShape->IsIconHandle())
8000 size = blkShape->gtIntCon.gtIconVal;
8002 /* A four byte BLK_COPY can be treated as an integer asignment */
8005 #ifdef _TARGET_64BIT_
8012 clsHnd = (CORINFO_CLASS_HANDLE) blkShape->gtIntCon.gtIconVal;
8013 size = roundUp(info.compCompHnd->getClassSize(clsHnd), sizeof(void*));
8015 // Since we round up, we are not handling the case where we have a
8016 // non-dword sized struct with GC pointers.
8017 // The EE currently does not allow this, but we may change. Lets assert it
8019 noway_assert(info.compCompHnd->getClassSize(clsHnd) == size);
8021 if (size == REGSIZE_BYTES)
8024 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8025 type = getJitGCType(gcPtr);
8030 // See if we can do a simple transformation:
8032 // GT_ASG <TYP_size>
8034 // GT_IND GT_IND or CNS_INT
8043 goto ONE_SIMPLE_ASG;
8046 goto ONE_SIMPLE_ASG;
8048 #ifdef _TARGET_64BIT_
8051 goto ONE_SIMPLE_ASG;
8052 #endif // _TARGET_64BIT_
8055 noway_assert(type != TYP_UNDEF);
8059 noway_assert(size <= REGSIZE_BYTES);
8061 // For INITBLK, a non constant source is not going to allow us to fiddle
8062 // with the bits to create a single assigment.
8064 if ((oper == GT_INITBLK) && (src->gtOper != GT_CNS_INT))
8069 if (impIsAddressInLocal(dest, &lclVarTree))
8071 #if LOCAL_ASSERTION_PROP
8072 // Kill everything about dest
8073 if (optLocalAssertionProp)
8075 if (optAssertionCount > 0)
8077 fgKillDependentAssertions(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(tree));
8080 #endif // LOCAL_ASSERTION_PROP
8082 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
8083 // A previous incarnation of this code also required the local not to be
8084 // address-exposed(=taken). That seems orthogonal to the decision of whether
8085 // to do field-wise assignments: being address-exposed will cause it to be
8086 // "dependently" promoted, so it will be in the right memory location. One possible
8087 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8088 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
8089 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the memory
8090 // of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
8091 if (varTypeIsStruct(lclVarTree) &&
8092 (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
8095 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
8099 if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8100 size == genTypeSize(var_types(lvaTable[lclNum].lvType)))
8102 // Use the dest local var directly.
8104 type = lvaTable[lclNum].lvType; // Make the type used in the GT_IND node match
8106 // If the block operation had been a write to a local var of a small int type,
8107 // of the exact size of the small int type, and the var is NormalizeOnStore,
8108 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8109 // have done that normalization. If we're now making it into an assignment,
8110 // the NormalizeOnStore will work, and it can be a full def.
8111 if (lvaTable[lclNum].lvNormalizeOnStore())
8113 dest->gtFlags &= (~GTF_VAR_USEASG);
8120 // Could be a non-promoted struct, or a floating point type local, or
8121 // an int subject to a partial write. Don't enregister.
8122 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
8123 // Fall through to indirect the dest node.
8125 // Mark the local var tree as a definition point of the local.
8126 lclVarTree->gtFlags |= GTF_VAR_DEF;
8127 if (size < lvaTable[lclNum].lvExactSize) // If it's not a full-width assignment....
8128 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8131 // Check to ensure we are not creating a reducible *(& ... )
8132 if (dest->gtOper == GT_ADDR)
8134 GenTreePtr addrOp = dest->gtOp.gtOp1;
8135 // Ignore reinterpret casts between int/gc
8136 if ((addrOp->TypeGet() == type) ||
8137 (varTypeIsIntegralOrI(addrOp) && (genTypeSize(addrOp->TypeGet()) == size)))
8140 type = addrOp->TypeGet();
8145 /* Indirect the dest node */
8147 dest = gtNewOperNode(GT_IND, type, dest);
8149 /* As long as we don't have more information about the destination we
8150 have to assume it could live anywhere (not just in the GC heap). Mark
8151 the GT_IND node so that we use the correct write barrier helper in case
8152 the field is a GC ref.
8155 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8160 dest->gtFlags |= GTF_DONT_CSE;
8162 if (tree->OperIsCopyBlkOp())
8164 if (impIsAddressInLocal(src, &lclVarTree))
8166 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
8167 if (varTypeIsStruct(lclVarTree) &&
8168 (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
8170 // Let fgMorphCopyBlock handle it.
8174 if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8175 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8177 /* Use the src local var directly */
8183 #ifndef LEGACY_BACKEND
8185 // The source argument of the copyblk can potentially
8186 // be accessed only through indir(addr(lclVar))
8187 // or indir(lclVarAddr) in rational form and liveness
8188 // won't account for these uses. That said,
8189 // we have to mark this local as address exposed so
8190 // we don't delete it as a dead store later on.
8191 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
8192 lvaTable[lclVarNum].lvAddrExposed = true;
8193 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8195 #else // LEGACY_BACKEND
8196 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8197 #endif // LEGACY_BACKEND
8199 // Fall through to indirect the src node.
8203 /* Indirect the src node */
8205 src = gtNewOperNode(GT_IND, type, src);
8206 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8211 src->gtFlags |= GTF_DONT_CSE;
8213 else // (oper == GT_INITBLK)
8217 size_t cns = src->gtIntCon.gtIconVal;
8223 #ifdef _TARGET_64BIT_
8228 #endif // _TARGET_64BIT_
8230 src->gtType = type; // Make the type used in the GT_IND node match for TYP_REF
8232 // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null)
8233 assert(!varTypeIsGC(type) || (cns == 0));
8236 src->gtIntCon.gtIconVal = cns;
8240 /* Create the assignment node */
8242 result = gtNewAssignNode(dest, src);
8243 result->gtType = type;
8253 //------------------------------------------------------------------------
8254 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
8257 // tree - a tree node with a gtOper of GT_INITBLK
8258 // the child nodes for tree have already been Morphed
8261 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
8262 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
8263 // If we have performed struct promotion of the Dest() then we will try to
8264 // perform a field by field assignment for each of the promoted struct fields
8267 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
8268 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
8269 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
8271 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
8273 noway_assert(tree->gtOper == GT_INITBLK);
8275 JITDUMP("\nfgMorphInitBlock:");
8277 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
8280 JITDUMP(" using oneAsgTree.\n");
8285 GenTreeInitBlk* initBlkOp = tree->AsInitBlk();
8287 GenTreePtr destAddr = initBlkOp->Dest();
8288 GenTreePtr initVal = initBlkOp->InitVal();
8289 GenTreePtr blockSize = initBlkOp->Size();
8291 // The dest must be an address
8292 noway_assert(genActualType(destAddr->gtType) == TYP_I_IMPL ||
8293 destAddr->gtType == TYP_BYREF);
8295 // The size must be an integer type
8296 assert(varTypeIsIntegral(blockSize->gtType));
8298 unsigned blockWidth = 0;
8299 bool blockWidthIsConst = false;
8301 if (blockSize->IsCnsIntOrI())
8303 blockWidthIsConst = true;
8304 blockWidth = unsigned(blockSize->gtIntConCommon.IconValue());
8307 GenTreeLclVarCommon* lclVarTree = nullptr;
8309 FieldSeqNode* destFldSeq = nullptr;
8310 unsigned destLclNum = BAD_VAR_NUM;
8311 LclVarDsc * destLclVar = nullptr;
8312 bool destDoFldAsg = false;
8314 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
8316 destLclNum = lclVarTree->gtLclNum;
8317 destLclVar = &lvaTable[destLclNum];
8319 #if LOCAL_ASSERTION_PROP
8320 // Kill everything about destLclNum (and its field locals)
8321 if (optLocalAssertionProp)
8323 if (optAssertionCount > 0)
8325 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
8328 #endif // LOCAL_ASSERTION_PROP
8330 if (destLclVar->lvPromoted && blockWidthIsConst)
8332 noway_assert(varTypeIsStruct(destLclVar));
8333 noway_assert(!opts.MinOpts());
8334 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
8336 JITDUMP(" dest is address exposed");
8340 if (blockWidth == destLclVar->lvExactSize)
8342 JITDUMP(" (destDoFldAsg=true)");
8343 // We may decide later that a copyblk is required when this struct has holes
8344 destDoFldAsg = true;
8348 JITDUMP(" with mismatched size");
8354 // Can we use field by field assignment for the dest?
8355 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
8357 JITDUMP(" dest contains holes");
8358 destDoFldAsg = false;
8361 JITDUMP(destDoFldAsg ? " using field by field initialization.\n"
8362 : " this requires an InitBlock.\n");
8364 if (!destDoFldAsg && (destLclVar != nullptr))
8366 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
8367 if (!destLclVar->lvRegStruct)
8369 // Mark it as DoNotEnregister.
8370 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
8374 // Mark the dest struct as DoNotEnreg
8375 // when they are LclVar structs and we are using a CopyBlock
8376 // or the struct is not promoted
8380 #if CPU_USES_BLOCK_MOVE
8381 compBlkOpUsed = true;
8386 // The initVal must be a constant of TYP_INT
8387 noway_assert(initVal->OperGet() == GT_CNS_INT);
8388 noway_assert(genActualType(initVal->gtType) == TYP_INT);
8390 // The dest must be of a struct type.
8391 noway_assert(varTypeIsStruct(destLclVar));
8394 // Now, convert InitBlock to individual assignments
8402 unsigned fieldLclNum;
8403 unsigned fieldCnt = destLclVar->lvFieldCnt;
8405 for (unsigned i=0; i<fieldCnt; ++i)
8407 fieldLclNum = destLclVar->lvFieldLclStart + i;
8408 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
8410 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
8411 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
8412 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
8414 srcCopy = gtCloneExpr(initVal);
8415 noway_assert(srcCopy != nullptr);
8417 // need type of oper to be same as tree
8418 if (dest->gtType == TYP_LONG)
8420 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
8421 // copy and extend the value
8422 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
8423 /* Change the types of srcCopy to TYP_LONG */
8424 srcCopy->gtType = TYP_LONG;
8426 else if (varTypeIsFloating(dest->gtType))
8428 srcCopy->ChangeOperConst(GT_CNS_DBL);
8429 // setup the bit pattern
8430 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal, sizeof(srcCopy->gtDblCon.gtDconVal));
8431 /* Change the types of srcCopy to TYP_DOUBLE */
8432 srcCopy->gtType = TYP_DOUBLE;
8436 noway_assert(srcCopy->gtOper == GT_CNS_INT);
8437 noway_assert(srcCopy->TypeGet() == TYP_INT);
8438 // setup the bit pattern
8439 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal, sizeof(srcCopy->gtIntCon.gtIconVal));
8442 srcCopy->gtType = dest->TypeGet();
8444 asg = gtNewAssignNode(dest, srcCopy);
8446 #if LOCAL_ASSERTION_PROP
8447 if (optLocalAssertionProp)
8449 optAssertionGen(asg);
8451 #endif // LOCAL_ASSERTION_PROP
8455 tree = gtNewOperNode(GT_COMMA,
8468 tree->gtFlags |= GTF_MORPHED;
8472 printf("fgMorphInitBlock (after):\n");
8481 //------------------------------------------------------------------------
8482 // fgMorphCopyBlock: Perform the Morphing of a GT_COPYBLK and GT_COPYOBJ nodes
8485 // tree - a tree node with a gtOper of GT_COPYBLK or GT_COPYOBJ
8486 // the child nodes for tree have already been Morphed
8489 // We can return the orginal GT_COPYBLK or GT_COPYOBJ unmodified (least desirable, but always correct)
8490 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
8491 // If we have performed struct promotion of the Source() or the Dest() then we will try to
8492 // perform a field by field assignment for each of the promoted struct fields
8495 // If we leave it as a GT_COPYBLK or GT_COPYOBJ we will call lvaSetVarDoNotEnregister() on both Source() and Dest()
8496 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
8497 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
8498 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
8499 // can not use a field by field assignment and must the orginal GT_COPYBLK unmodified.
8501 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
8503 noway_assert(tree->OperIsCopyBlkOp());
8505 JITDUMP("\nfgMorphCopyBlock:");
8507 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
8509 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
8513 JITDUMP(" using oneAsgTree.\n");
8518 GenTreePtr destAddr;
8520 GenTreePtr blockSize;
8523 if (tree->OperGet() == GT_COPYBLK)
8525 GenTreeCpBlk* copyBlkOp = tree->AsCpBlk();
8528 destAddr = copyBlkOp->Dest();
8529 srcAddr = copyBlkOp->Source();
8530 blockSize = copyBlkOp->Size();
8534 GenTreeCpObj* copyObjOp = tree->AsCpObj();
8537 destAddr = copyObjOp->Dest();
8538 srcAddr = copyObjOp->Source();
8539 blockSize = copyObjOp->ClsTok();
8542 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
8543 noway_assert(srcAddr->TypeGet() == TYP_BYREF || srcAddr->TypeGet() == TYP_I_IMPL);
8545 unsigned blockWidth = 0;
8546 bool blockWidthIsConst = false;
8548 if (blockSize->IsCnsIntOrI())
8550 blockWidthIsConst = true;
8551 if (blockSize->IsIconHandle(GTF_ICON_CLASS_HDL))
8553 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE) blockSize->gtIntConCommon.IconValue();
8554 blockWidth = info.compCompHnd->getClassSize(clsHnd);
8558 blockWidth = unsigned(blockSize->gtIntConCommon.IconValue());
8562 GenTreeLclVarCommon* lclVarTree = nullptr;
8564 FieldSeqNode* destFldSeq = nullptr;
8565 unsigned destLclNum = BAD_VAR_NUM;
8566 LclVarDsc* destLclVar = nullptr;
8567 bool destDoFldAsg = false;
8568 bool destOnStack = false;
8570 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
8573 destLclNum = lclVarTree->gtLclNum;
8574 destLclVar = &lvaTable[destLclNum];
8576 #if LOCAL_ASSERTION_PROP
8577 // Kill everything about destLclNum (and its field locals)
8578 if (optLocalAssertionProp)
8580 if (optAssertionCount > 0)
8582 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
8585 #endif // LOCAL_ASSERTION_PROP
8587 if (destLclVar->lvPromoted && blockWidthIsConst)
8589 noway_assert(varTypeIsStruct(destLclVar));
8590 noway_assert(!opts.MinOpts());
8592 if (blockWidth == destLclVar->lvExactSize)
8594 JITDUMP(" (destDoFldAsg=true)");
8595 // We may decide later that a copyblk is required when this struct has holes
8596 destDoFldAsg = true;
8600 JITDUMP(" with mismatched dest size");
8605 FieldSeqNode* srcFldSeq = nullptr;
8606 unsigned srcLclNum = BAD_VAR_NUM;
8607 LclVarDsc* srcLclVar = nullptr;
8608 bool srcDoFldAsg = false;
8610 if (srcAddr->IsLocalAddrExpr(this, &lclVarTree, &srcFldSeq))
8612 srcLclNum = lclVarTree->gtLclNum;
8613 srcLclVar = &lvaTable[srcLclNum];
8615 if (srcLclVar->lvPromoted && blockWidthIsConst)
8617 noway_assert(varTypeIsStruct(srcLclVar));
8618 noway_assert(!opts.MinOpts());
8620 if (blockWidth == srcLclVar->lvExactSize)
8622 JITDUMP(" (srcDoFldAsg=true)");
8623 // We may decide later that a copyblk is required when this struct has holes
8628 JITDUMP(" with mismatched src size");
8633 // Check to see if we are required to do a copy block because the struct contains holes
8634 // and either the src or dest is externally visible
8636 bool requiresCopyBlock = false;
8637 bool srcSingleLclVarAsg = false;
8639 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
8640 if ((destLclVar != nullptr && destLclVar->lvRegStruct) ||
8641 (srcLclVar != nullptr && srcLclVar->lvRegStruct))
8643 requiresCopyBlock = true;
8646 // Can we use field by field assignment for the dest?
8647 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
8649 JITDUMP(" dest contains custom layout and contains holes");
8650 // C++ style CopyBlock with holes
8651 requiresCopyBlock = true;
8654 // Can we use field by field assignment for the src?
8655 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
8657 JITDUMP(" src contains custom layout and contains holes");
8658 // C++ style CopyBlock with holes
8659 requiresCopyBlock = true;
8662 if (tree->OperGet() == GT_COPYBLK && tree->AsCpBlk()->gtBlkOpGcUnsafe)
8664 requiresCopyBlock = true;
8667 // If we passed the above checks, then we will check these two
8668 if (!requiresCopyBlock)
8670 // Are both dest and src promoted structs?
8671 if (destDoFldAsg && srcDoFldAsg)
8673 // Both structs should be of the same type, if not we will use a copy block
8674 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() != lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
8676 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
8677 JITDUMP(" with mismatched types");
8680 // Are neither dest or src promoted structs?
8681 else if (!destDoFldAsg && !srcDoFldAsg)
8683 requiresCopyBlock = true; // Leave as a CopyBlock
8684 JITDUMP(" with no promoted structs");
8686 else if (destDoFldAsg)
8688 // Match the following kinds of trees:
8689 // fgMorphTree BB01, stmt 9 (before)
8690 // [000052] ------------ const int 8
8691 // [000053] -A--G------- copyBlk void
8692 // [000051] ------------ addr byref
8693 // [000050] ------------ lclVar long V07 loc5
8694 // [000054] --------R--- <list> void
8695 // [000049] ------------ addr byref
8696 // [000048] ------------ lclVar struct(P) V06 loc4
8697 // long V06.h (offs=0x00) -> V17 tmp9
8698 // Yields this transformation
8699 // fgMorphCopyBlock (after):
8700 // [000050] ------------ lclVar long V07 loc5
8701 // [000085] -A---------- = long
8702 // [000083] D------N---- lclVar long V17 tmp9
8704 if (blockWidthIsConst &&
8705 (destLclVar->lvFieldCnt == 1) &&
8706 (srcLclVar != nullptr) &&
8707 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
8709 // Reject the following tree:
8710 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
8712 // fgMorphTree BB01, stmt 6 (before)
8713 // [000038] ------------- const int 4
8714 // [000039] -A--G-------- copyBlk void
8715 // [000037] ------------- addr byref
8716 // [000036] ------------- lclVar int V05 loc3
8717 // [000040] --------R---- <list> void
8718 // [000035] ------------- addr byref
8719 // [000034] ------------- lclVar struct(P) V04 loc2
8720 // float V04.f1 (offs=0x00) -> V13 tmp6
8721 // As this would framsform into
8722 // float V13 = int V05
8724 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
8725 var_types destType = lvaTable[fieldLclNum].TypeGet();
8726 if (srcLclVar->TypeGet() == destType)
8728 srcSingleLclVarAsg = true;
8734 // If we require a copy block the set both of the field assign bools to false
8735 if (requiresCopyBlock)
8737 // If a copy block is required then we won't do field by field assignments
8738 destDoFldAsg = false;
8739 srcDoFldAsg = false;
8742 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n"
8743 : " using field by field assignments.\n");
8745 // Mark the dest/src structs as DoNotEnreg
8746 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
8747 // or the struct is not promoted
8749 if (!destDoFldAsg && (destLclVar != nullptr))
8751 if (!destLclVar->lvRegStruct)
8753 // Mark it as DoNotEnregister.
8754 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
8758 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
8760 if (!srcLclVar->lvRegStruct)
8762 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
8766 if (requiresCopyBlock)
8768 #if CPU_USES_BLOCK_MOVE
8769 compBlkOpUsed = true;
8771 // Note that the unrolling of CopyBlk is only implemented on some platforms
8772 // Currently that includes x64 and Arm64 but not x64 or Arm32
8773 #ifdef CPBLK_UNROLL_LIMIT
8774 // If we have a CopyObj with a dest on the stack
8775 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
8776 // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
8778 if (isCopyObj && destOnStack && blockWidthIsConst &&
8779 (blockWidth >= (2*TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
8781 tree->SetOper(GT_COPYBLK);
8782 tree->AsCpBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsage copy block
8783 blockSize->gtIntConCommon.SetIconValue(ssize_t(blockWidth));
8784 blockSize->gtFlags &= ~GTF_ICON_HDL_MASK; // Clear the GTF_ICON_CLASS_HDL flags
8787 // Liveness doesn't consider copyblk arguments of simple types as being
8788 // a use or def, so explicitly mark these variables as address-exposed.
8789 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
8791 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
8792 lvaTable[srcLclNum].lvAddrExposed = true;
8795 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
8797 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
8798 lvaTable[destLclNum].lvAddrExposed = true;
8805 // Otherwise we convert this CopyBlock into individual field by field assignments
8812 GenTreePtr addrSpill = nullptr;
8813 unsigned addrSpillTemp = BAD_VAR_NUM;
8814 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
8816 unsigned fieldCnt = DUMMY_INIT(0);
8818 if (destDoFldAsg && srcDoFldAsg)
8820 // To do fieldwise assignments for both sides, they'd better be the same struct type!
8821 // All of these conditions were checked above...
8822 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
8823 assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() == lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle());
8824 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
8826 fieldCnt = destLclVar->lvFieldCnt;
8827 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field assignments.
8829 else if (destDoFldAsg)
8831 fieldCnt = destLclVar->lvFieldCnt;
8835 assert(srcDoFldAsg);
8836 fieldCnt = srcLclVar->lvFieldCnt;
8841 noway_assert(!srcDoFldAsg);
8842 if (gtClone(srcAddr))
8844 // srcAddr is simple expression. No need to spill.
8845 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
8849 // srcAddr is complex expression. Clone and spill it (unless the destination is
8850 // a struct local that only has one field, in which case we'd only use the
8851 // address value once...)
8852 if (destLclVar->lvFieldCnt > 1)
8854 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
8855 noway_assert(addrSpill != nullptr);
8862 noway_assert(!destDoFldAsg);
8864 // If we're doing field-wise stores, to an address within a local, and we copy
8865 // the address into "addrSpill", do *not* declare the original local var node in the
8866 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
8867 // field-wise assignments as an "indirect" assignment to the local.
8868 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
8870 if (lclVarTree != nullptr)
8872 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
8875 if (gtClone(destAddr))
8877 // destAddr is simple expression. No need to spill
8878 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
8882 // destAddr is complex expression. Clone and spill it (unless
8883 // the source is a struct local that only has one field, in which case we'd only
8884 // use the address value once...)
8885 if (srcLclVar->lvFieldCnt > 1)
8887 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
8888 noway_assert(addrSpill != nullptr);
8891 // TODO-CQ: this should be based on a more general
8892 // "BaseAddress" method, that handles fields of structs, before or after
8894 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
8896 if (addrSpill->gtOp.gtOp1->IsLocal())
8898 // We will *not* consider this to define the local, but rather have each individual field assign
8900 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
8901 assert(lvaTable[addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum].lvLclBlockOpAddr == 1);
8902 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our local stack frame
8908 if (addrSpill != nullptr)
8910 // Spill the (complex) address to a BYREF temp.
8911 // Note, at most one address may need to be spilled.
8912 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
8914 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
8916 if (addrSpillIsStackDest)
8918 lvaTable[addrSpillTemp].lvStackByref = true;
8921 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF),
8924 #ifndef LEGACY_BACKEND
8925 // If we are assigning the address of a LclVar here
8926 // liveness does not account for this kind of address taken use.
8928 // We have to mark this local as address exposed so
8929 // that we don't delete the definition for this LclVar
8930 // as a dead store later on.
8932 if (addrSpill->OperGet() == GT_ADDR)
8934 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
8935 if (addrOp->IsLocal())
8937 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
8938 lvaTable[lclVarNum].lvAddrExposed = true;
8939 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8942 #endif // !LEGACY_BACKEND
8947 for (unsigned i=0; i<fieldCnt; ++i)
8949 FieldSeqNode* curFieldSeq = nullptr;
8952 noway_assert(destLclNum != BAD_VAR_NUM);
8953 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
8954 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
8956 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
8957 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
8958 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
8962 noway_assert(srcDoFldAsg);
8963 noway_assert(srcLclNum != BAD_VAR_NUM);
8964 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
8968 assert(addrSpillTemp != BAD_VAR_NUM);
8969 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
8973 dest = gtCloneExpr(destAddr);
8974 noway_assert(dest != nullptr);
8976 // Is the address of a local?
8977 GenTreeLclVarCommon* lclVarTree = nullptr;
8978 bool isEntire = false;
8979 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
8980 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
8982 lclVarTree->gtFlags |= GTF_VAR_DEF;
8984 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8988 GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
8989 // Have to set the field sequence -- which means we need the field handle.
8990 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
8991 CORINFO_FIELD_HANDLE fieldHnd = info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
8992 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
8993 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
8995 dest = gtNewOperNode(GT_ADD, TYP_BYREF,
8999 dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
9001 // !!! The destination could be on stack. !!!
9002 // This flag will let us choose the correct write barrier.
9003 dest->gtFlags |= GTF_IND_TGTANYWHERE;
9009 noway_assert(srcLclNum != BAD_VAR_NUM);
9010 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
9011 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9013 noway_assert(srcAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
9014 src->gtFlags |= srcAddr->gtOp.gtOp1->gtFlags & ~GTF_NODE_MASK;
9018 noway_assert(destDoFldAsg);
9019 noway_assert(destLclNum != BAD_VAR_NUM);
9020 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
9022 if (srcSingleLclVarAsg)
9024 noway_assert(fieldCnt == 1);
9025 noway_assert(srcLclVar != nullptr);
9026 noway_assert(addrSpill == nullptr);
9028 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
9034 assert(addrSpillTemp != BAD_VAR_NUM);
9035 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
9039 src = gtCloneExpr(srcAddr);
9040 noway_assert(src != nullptr);
9043 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
9044 CORINFO_FIELD_HANDLE fieldHnd = info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
9045 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
9047 src = gtNewOperNode(GT_ADD, TYP_BYREF,
9049 new(this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL,
9050 lvaTable[fieldLclNum].lvFldOffset,
9053 src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
9057 noway_assert(dest->TypeGet() == src->TypeGet());
9059 asg = gtNewAssignNode(dest, src);
9061 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
9062 // and it was of a local, record the assignment as an indirect update of a local.
9063 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
9065 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
9066 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType))
9067 == genTypeSize(dest->TypeGet()));
9068 IndirectAssignmentAnnotation* pIndirAnnot =
9069 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
9070 GetIndirAssignMap()->Set(asg, pIndirAnnot);
9073 #if LOCAL_ASSERTION_PROP
9074 if (optLocalAssertionProp)
9076 optAssertionGen(asg);
9078 #endif // LOCAL_ASSERTION_PROP
9082 tree = gtNewOperNode(GT_COMMA,
9096 tree->gtFlags |= GTF_LATE_ARG;
9100 tree->gtFlags |= GTF_MORPHED;
9104 printf("\nfgMorphCopyBlock (after):\n");
9113 // insert conversions and normalize to make tree amenable to register
9115 GenTree* Compiler::fgMorphForRegisterFP(GenTree *tree)
9117 GenTreePtr op1 = tree->gtOp.gtOp1;
9118 GenTreePtr op2 = tree->gtGetOp2();
9120 if (tree->OperIsArithmetic()
9121 && varTypeIsFloating(tree))
9123 if (op1->TypeGet() != tree->TypeGet())
9125 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
9127 if (op2->TypeGet() != tree->TypeGet())
9129 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
9132 else if (tree->OperIsCompare()
9133 && varTypeIsFloating(op1)
9134 && op1->TypeGet() != op2->TypeGet())
9136 // both had better be floating, just one bigger than other
9137 assert (varTypeIsFloating(op2));
9138 if (op1->TypeGet() == TYP_FLOAT)
9140 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
9142 else if (op2->TypeGet() == TYP_FLOAT)
9144 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
9151 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
9153 GenTree* op1 = compare->gtOp.gtOp1;
9154 GenTree* op2 = compare->gtOp.gtOp2;
9156 GenTreeCall* opCall;
9158 // recognize this pattern:
9160 // stmtExpr void (IL 0x000... ???)
9164 // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
9165 // const(h) long 0x7fed96836c8 class
9167 // ld.lclVar struct V00 arg0
9170 // which comes from this code (reported by customer as being slow) :
9172 // private static bool IsNull<T>(T arg)
9174 // return arg==null;
9178 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
9181 opCall = op2->AsCall();
9183 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
9186 opCall = op1->AsCall();
9193 if (opCns->gtIntConCommon.IconValue() != 0)
9196 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
9199 // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
9200 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
9203 compare->gtOp.gtOp1 = newOp;
9205 compare->gtOp.gtOp2 = newOp;
9212 //--------------------------------------------------------------------------------------
9213 // fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
9216 // simdNode - The GenTreeSIMD node to be copied
9219 // A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
9222 Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
9224 // Copy the result of the SIMD intrinsic into a temp.
9225 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
9227 CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
9228 // We only have fields of the fixed float vectors.
9229 noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
9230 switch(simdNode->gtSIMDSize)
9232 case 8: simdHandle = SIMDVector2Handle; break;
9233 case 12: simdHandle = SIMDVector3Handle; break;
9234 case 16: simdHandle = SIMDVector4Handle; break;
9235 default: noway_assert(!"field of unexpected SIMD type"); break;
9237 assert(simdHandle != NO_CLASS_HANDLE);
9239 lvaSetStruct(lclNum, simdHandle, false, true);
9240 lvaTable[lclNum].lvFieldAccessed = true;
9242 GenTree* asg = gtNewTempAssign(lclNum, simdNode);
9243 GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
9245 GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
9249 //--------------------------------------------------------------------------------------------------------------
9250 // getSIMDStructFromField:
9251 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
9252 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
9253 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
9254 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
9255 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
9256 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
9257 // if the struct is a SIMD struct.
9260 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
9261 // struct used for simd intrinsic or not.
9262 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
9263 // to simd lclvar's base type.
9264 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
9265 // equals to the index number of this field.
9266 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
9267 // equals to the simd struct size which this tree belongs to.
9268 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
9269 // the UsedInSIMDIntrinsic check.
9272 // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
9273 // instrinic related field, return nullptr.
9276 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree, var_types* pBaseTypeOut, unsigned* indexOut, unsigned* simdSizeOut, bool ignoreUsedInSIMDIntrinsic/*false*/)
9278 GenTreePtr ret = nullptr;
9279 if(tree->OperGet() == GT_FIELD)
9281 GenTreePtr objRef = tree->gtField.gtFldObj;
9282 if (objRef != nullptr)
9284 GenTreePtr obj = nullptr;
9285 if (objRef->gtOper == GT_ADDR)
9287 obj = objRef->gtOp.gtOp1;
9289 else if(ignoreUsedInSIMDIntrinsic)
9298 if (isSIMDTypeLocal(obj))
9300 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
9301 LclVarDsc* varDsc = &lvaTable[lclNum];
9302 if(varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
9304 *simdSizeOut = varDsc->lvExactSize;
9305 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
9309 else if (obj->OperGet() == GT_SIMD)
9312 GenTreeSIMD* simdNode = obj->AsSIMD();
9313 *simdSizeOut = simdNode->gtSIMDSize;
9314 *pBaseTypeOut = simdNode->gtSIMDBaseType;
9320 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
9321 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
9326 /*****************************************************************************
9327 * If a read operation tries to access simd struct field, then transform the this
9328 * operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
9329 * Otherwise, return the old tree.
9331 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
9332 * intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
9334 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
9338 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
9341 var_types baseType = TYP_UNKNOWN;
9342 unsigned simdSize = 0;
9343 GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
9344 if(simdStructNode != nullptr)
9347 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
9348 GenTree* op2 = gtNewIconNode(index);
9349 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
9351 tree->gtFlags |= GTF_MORPHED;
9357 /*****************************************************************************
9358 * Transform an assignment of a SIMD struct field to SIMD intrinsic
9359 * SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
9360 * then return the old tree.
9362 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
9363 * intrinsic. We will morph it as simd intrinsic set.
9365 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
9369 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
9371 assert(tree->OperGet() == GT_ASG);
9372 GenTreePtr op1 = tree->gtGetOp1();
9373 GenTreePtr op2 = tree->gtGetOp2();
9376 var_types baseType = TYP_UNKNOWN;
9377 unsigned simdSize = 0;
9378 GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
9379 if (simdOp1Struct != nullptr)
9381 //Generate the simd set intrinsic
9382 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
9384 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
9388 simdIntrinsicID = SIMDIntrinsicSetX;
9391 simdIntrinsicID = SIMDIntrinsicSetY;
9394 simdIntrinsicID = SIMDIntrinsicSetZ;
9397 simdIntrinsicID = SIMDIntrinsicSetW;
9400 noway_assert("There is no set intrinsic for index bigger than 3");
9404 GenTreePtr newStruct = gtClone(simdOp1Struct);
9405 assert((newStruct != nullptr) && (varTypeIsSIMD(newStruct)));
9406 GenTreePtr simdTree = gtNewSIMDNode(newStruct->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
9407 GenTreePtr copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, newStruct);
9408 tree = gtNewBlkOpNode(GT_COPYBLK,
9410 gtNewOperNode(GT_ADDR, TYP_BYREF, simdTree),
9411 gtNewIconNode(simdSize),
9414 tree->gtFlags |= GTF_MORPHED;
9422 /*****************************************************************************
9424 * Transform the given GTK_SMPOP tree for code generation.
9428 #pragma warning(push)
9429 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
9431 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
9433 // this extra scope is a workaround for a gcc bug
9434 // the inline destructor for ALLOCA_CHECK confuses the control
9435 // flow and gcc thinks that the function never returns
9438 noway_assert(tree->OperKind() & GTK_SMPOP);
9440 /* The steps in this function are :
9441 o Perform required preorder processing
9442 o Process the first, then second operand, if any
9443 o Perform required postorder morphing
9444 o Perform optional postorder morphing if optimizing
9447 bool isQmarkColon = false;
9449 #if LOCAL_ASSERTION_PROP
9450 AssertionIndex origAssertionCount = DUMMY_INIT(0);
9451 AssertionDsc * origAssertionTab = DUMMY_INIT(NULL);
9453 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
9454 AssertionDsc * thenAssertionTab = DUMMY_INIT(NULL);
9459 #if !FEATURE_STACK_FP_X87
9460 tree = fgMorphForRegisterFP(tree);
9464 genTreeOps oper = tree->OperGet();
9465 var_types typ = tree->TypeGet();
9466 GenTreePtr op1 = tree->gtOp.gtOp1;
9467 GenTreePtr op2 = tree->gtGetOp2();
9469 /*-------------------------------------------------------------------------
9470 * First do any PRE-ORDER processing
9475 // Some arithmetic operators need to use a helper call to the EE
9479 tree = fgDoNormalizeOnStore(tree);
9480 /* fgDoNormalizeOnStore can change op2 */
9481 noway_assert(op1 == tree->gtOp.gtOp1);
9482 op2 = tree->gtOp.gtOp2;
9486 // We should check whether op2 should be assigned to a SIMD field or not.
9487 // if it is, we should tranlate the tree to simd intrinsic
9488 GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
9489 if (newTree != tree)
9492 oper = tree->OperGet();
9493 typ = tree->TypeGet();
9494 op1 = tree->gtOp.gtOp1;
9495 op2 = tree->gtGetOp2();
9517 /* We can't CSE the LHS of an assignment. Only r-values can be CSEed */
9518 op1->gtFlags |= GTF_DONT_CSE;
9523 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
9524 op1->gtFlags |= GTF_DONT_CSE;
9532 if (op1->OperKind() & GTK_RELOP)
9534 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
9535 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
9536 not need to materialize the result as a 0 or 1. */
9538 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
9539 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
9541 // Request that the codegen for op1 sets the condition flags
9542 // when it generates the code for op1.
9544 // Codegen for op1 must set the condition flags if
9545 // this method returns true.
9547 op1->gtRequestSetFlags();
9551 GenTreePtr effOp1 = op1->gtEffectiveVal();
9553 noway_assert( (effOp1->gtOper == GT_CNS_INT) &&
9554 ((effOp1->gtIntCon.gtIconVal == 0) || (effOp1->gtIntCon.gtIconVal == 1)) );
9559 #if LOCAL_ASSERTION_PROP
9560 if (optLocalAssertionProp)
9562 isQmarkColon = true;
9566 return fgMorphArrayIndex(tree);
9569 return fgMorphCast(tree);
9573 #ifndef _TARGET_64BIT_
9574 if (typ == TYP_LONG)
9576 /* For (long)int1 * (long)int2, we dont actually do the
9577 casts, and just multiply the 32 bit values, which will
9578 give us the 64 bit result in edx:eax */
9581 if ((op1->gtOper == GT_CAST &&
9582 op2->gtOper == GT_CAST &&
9583 genActualType(op1->CastFromType()) == TYP_INT &&
9584 genActualType(op2->CastFromType()) == TYP_INT)&&
9585 !op1->gtOverflow() && !op2->gtOverflow())
9587 // The casts have to be of the same signedness.
9588 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
9590 //We see if we can force an int constant to change its signedness
9592 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
9594 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
9599 if ( ((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)) )
9600 constOp->gtFlags ^= GTF_UNSIGNED;
9605 // The only combination that can overflow
9606 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) &&
9607 !( op1->gtFlags & GTF_UNSIGNED))
9610 /* Remaining combinations can never overflow during long mul. */
9612 tree->gtFlags &= ~GTF_OVERFLOW;
9614 /* Do unsigned mul only if the casts were unsigned */
9616 tree->gtFlags &= ~GTF_UNSIGNED;
9617 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
9619 /* Since we are committing to GTF_MUL_64RSLT, we don't want
9620 the casts to be folded away. So morph the castees directly */
9622 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
9623 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
9625 // Propagate side effect flags up the tree
9626 op1->gtFlags &= ~GTF_ALL_EFFECT;
9627 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
9628 op2->gtFlags &= ~GTF_ALL_EFFECT;
9629 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
9631 // If the GT_MUL can be altogether folded away, we should do that.
9633 if ((op1->gtCast.CastOp()->OperKind() &
9634 op2->gtCast.CastOp()->OperKind() & GTK_CONST) && opts.OptEnabled(CLFLG_CONSTANTFOLD))
9636 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
9637 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
9638 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
9639 tree = gtFoldExprConst(tree);
9640 noway_assert(tree->OperIsConst());
9644 tree->gtFlags |= GTF_MUL_64RSLT;
9646 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
9647 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
9649 // Insert GT_NOP nodes for the cast operands so that they do not get folded
9650 // And propagate the new flags. We don't want to CSE the casts because
9651 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
9653 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
9655 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
9656 op1->gtFlags &= ~GTF_ALL_EFFECT;
9657 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
9658 op1->gtFlags |= GTF_DONT_CSE;
9661 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
9663 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
9664 op2->gtFlags &= ~GTF_ALL_EFFECT;
9665 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
9666 op2->gtFlags |= GTF_DONT_CSE;
9669 tree->gtFlags &= ~GTF_ALL_EFFECT;
9670 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
9672 goto DONE_MORPHING_CHILDREN;
9674 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
9677 if (tree->gtOverflow())
9678 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF
9679 : CORINFO_HELP_LMUL_OVF;
9681 helper = CORINFO_HELP_LMUL;
9683 goto USE_HELPER_FOR_ARITH;
9687 /* We are seeing this node again. We have decided to use
9688 GTF_MUL_64RSLT, so leave it alone. */
9690 assert(tree->gtIsValid64RsltMul());
9693 #endif // !_TARGET_64BIT_
9699 #ifndef _TARGET_64BIT_
9700 if (typ == TYP_LONG)
9702 helper = CORINFO_HELP_LDIV;
9703 goto USE_HELPER_FOR_ARITH;
9706 #if USE_HELPERS_FOR_INT_DIV
9707 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
9709 helper = CORINFO_HELP_DIV;
9710 goto USE_HELPER_FOR_ARITH;
9713 #endif // !_TARGET_64BIT_
9715 #ifndef LEGACY_BACKEND
9716 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
9718 op2 = gtFoldExprConst(op2);
9721 if (fgShouldUseMagicNumberDivide(tree->AsOp()))
9723 tree = fgMorphDivByConst(tree->AsOp());
9724 op1 = tree->gtOp.gtOp1;
9725 op2 = tree->gtOp.gtOp2;
9727 #endif // !LEGACY_BACKEND
9733 #ifndef _TARGET_64BIT_
9734 if (typ == TYP_LONG)
9736 helper = CORINFO_HELP_ULDIV;
9737 goto USE_HELPER_FOR_ARITH;
9739 #if USE_HELPERS_FOR_INT_DIV
9740 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
9742 helper = CORINFO_HELP_UDIV;
9743 goto USE_HELPER_FOR_ARITH;
9746 #endif // _TARGET_64BIT_
9752 if (varTypeIsFloating(typ))
9754 helper = CORINFO_HELP_DBLREM;
9756 if (op1->TypeGet() == TYP_FLOAT)
9757 if (op2->TypeGet() == TYP_FLOAT)
9758 helper = CORINFO_HELP_FLTREM;
9760 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
9762 if (op2->TypeGet() == TYP_FLOAT)
9763 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
9764 goto USE_HELPER_FOR_ARITH;
9767 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
9768 // A similar optimization for signed mod will not work for a negative perfectly divisible
9769 // HI-word. To make it correct, we would need to divide without the sign and then flip the
9770 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
9771 goto ASSIGN_HELPER_FOR_MOD;
9775 #ifdef _TARGET_ARMARCH_
9777 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
9779 #else // _TARGET_XARCH
9780 /* If this is an unsigned long mod with op2 which is a cast to long from a
9781 constant int, then don't morph to a call to the helper. This can be done
9782 faster inline using idiv.
9786 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
9787 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
9788 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
9790 if (op2->gtOper == GT_CAST &&
9791 op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
9792 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
9793 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
9794 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
9796 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
9797 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
9800 if (op2->gtOper == GT_CNS_NATIVELONG &&
9801 op2->gtIntConCommon.LngValue() >= 2 &&
9802 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
9804 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
9805 noway_assert(op1->TypeGet() == TYP_LONG);
9807 // Update flags for op1 morph
9808 tree->gtFlags &= ~GTF_ALL_EFFECT;
9810 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
9812 // If op1 is a constant, then do constant folding of the division operator
9813 if (op1->gtOper == GT_CNS_NATIVELONG)
9815 tree = gtFoldExpr(tree);
9820 #endif // _TARGET_XARCH
9822 ASSIGN_HELPER_FOR_MOD:
9824 // For "val % 1", return 0 if op1 doesn't have any side effects
9825 // and we are not in the CSE phase, we cannot discard 'tree'
9826 // because it may contain CSE expressions that we haven't yet examined.
9828 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
9830 if (((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1))
9831 || ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == 1)))
9833 GenTreePtr zeroNode = gtNewZeroConNode(typ);
9835 zeroNode->gtFlags |= GTF_MORPHED;
9837 DEBUG_DESTROY_NODE(tree);
9842 #ifndef _TARGET_64BIT_
9843 if (typ == TYP_LONG)
9845 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
9846 goto USE_HELPER_FOR_ARITH;
9849 #if USE_HELPERS_FOR_INT_DIV
9852 if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
9854 helper = CORINFO_HELP_UMOD;
9855 goto USE_HELPER_FOR_ARITH;
9857 else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
9859 helper = CORINFO_HELP_MOD;
9860 goto USE_HELPER_FOR_ARITH;
9864 #endif // !_TARGET_64BIT_
9866 #ifndef LEGACY_BACKEND
9867 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
9869 op2 = gtFoldExprConst(op2);
9872 #ifdef _TARGET_ARM64_
9874 // For ARM64 we don't have a remainder instruction,
9875 // The architecture manual suggests the following transformation to
9876 // generate code for such operator:
9878 // a % b = a - (a / b) * b;
9880 tree = fgMorphModToSubMulDiv(tree->AsOp());
9881 op1 = tree->gtOp.gtOp1;
9882 op2 = tree->gtOp.gtOp2;
9884 #else // !_TARGET_ARM64_
9886 if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp()))
9888 tree = fgMorphModByConst(tree->AsOp());
9889 op1 = tree->gtOp.gtOp1;
9890 op2 = tree->gtOp.gtOp2;
9893 #endif //_TARGET_ARM64_
9894 #endif // !LEGACY_BACKEND
9897 USE_HELPER_FOR_ARITH:
9899 /* We have to morph these arithmetic operations into helper calls
9900 before morphing the arguments (preorder), else the arguments
9901 won't get correct values of fgPtrArgCntCur.
9902 However, try to fold the tree first in case we end up with a
9903 simple node which won't need a helper call at all */
9905 noway_assert(tree->OperIsBinary());
9907 GenTreePtr oldTree = tree;
9909 tree = gtFoldExpr(tree);
9911 // Were we able to fold it ?
9912 // Note that gtFoldExpr may return a non-leaf even if successful
9913 // e.g. for something like "expr / 1" - see also bug #290853
9914 if (tree->OperIsLeaf() || (oldTree != tree))
9917 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
9920 // Did we fold it into a comma node with throw?
9921 if (tree->gtOper == GT_COMMA)
9923 noway_assert(fgIsCommaThrow(tree));
9924 return fgMorphTree(tree);
9927 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
9930 // normalize small integer return values
9931 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) &&
9932 (op1 != NULL) && (op1->TypeGet() != TYP_VOID) &&
9933 fgCastNeeded(op1, info.compRetType))
9935 // Small-typed return values are normalized by the callee
9936 op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
9938 // Propagate GTF_COLON_COND
9939 op1->gtFlags|=(tree->gtFlags & GTF_COLON_COND);
9941 tree->gtOp.gtOp1 = fgMorphCast(op1);
9943 // Propagate side effect flags
9944 tree->gtFlags &= ~GTF_ALL_EFFECT;
9945 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
9954 // Check for typeof(...) == obj.GetType()
9955 // Also check for typeof(...) == typeof(...)
9956 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
9957 // type handles and instances of System.Type
9958 // If this invariant is ever broken, the optimization will need updating
9960 #ifdef LEGACY_BACKEND
9961 if ( op1->gtOper == GT_CALL &&
9962 op2->gtOper == GT_CALL &&
9963 ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op1->gtCall.gtCallType == CT_HELPER)) &&
9964 ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op2->gtCall.gtCallType == CT_HELPER)))
9966 if ((((op1->gtOper == GT_INTRINSIC) && (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
9967 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
9968 (((op2->gtOper == GT_INTRINSIC) && (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
9969 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
9972 GenTreePtr pGetClassFromHandle;
9973 GenTreePtr pGetType;
9975 #ifdef LEGACY_BACKEND
9976 bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
9977 bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
9979 bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
9980 bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
9983 // Optimize typeof(...) == typeof(...)
9984 // Typically this occurs in generic code that attempts a type switch
9985 // e.g. typeof(T) == typeof(int)
9987 if (bOp1ClassFromHandle && bOp2ClassFromHandle)
9989 GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
9990 GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
9992 GenTreePtr compare = gtNewOperNode(oper, TYP_INT,
9993 classFromHandleArg1,
9994 classFromHandleArg2);
9996 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
9999 return fgMorphTree(compare);
10001 else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
10004 // Now check for GetClassFromHandle(handle) == obj.GetType()
10007 if (bOp1ClassFromHandle)
10009 pGetClassFromHandle = tree->gtOp.gtOp1;
10014 pGetClassFromHandle = tree->gtOp.gtOp2;
10018 GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
10019 GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
10021 // Unwrap GT_NOP node used to prevent constant folding
10022 if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
10024 pConstLiteral = pConstLiteral->gtOp.gtOp1;
10027 // In the ngen case, we have to go thru an indirection to get the right handle.
10028 if (pConstLiteral->gtOper == GT_IND)
10030 pConstLiteral = pConstLiteral->gtOp.gtOp1;
10032 #ifdef LEGACY_BACKEND
10034 if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
10035 info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType &&
10037 if ((pGetType->gtOper == GT_INTRINSIC) && (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
10039 pConstLiteral->gtOper == GT_CNS_INT &&
10040 pConstLiteral->gtType == TYP_I_IMPL)
10042 CORINFO_CLASS_HANDLE clsHnd = CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
10044 if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
10046 // Method Table tree
10047 #ifdef LEGACY_BACKEND
10048 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
10050 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
10052 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
10053 compCurBB->bbFlags |= BBF_HAS_VTABREF;
10054 optMethodFlags |= OMF_HAS_VTABLEREF;
10056 // Method table constant
10057 GenTreePtr cnsMT = pGetClassFromHandleArgument;
10059 GenTreePtr compare = gtNewOperNode(oper, TYP_INT,
10063 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
10065 // Morph and return
10066 return fgMorphTree(compare);
10071 fgMorphRecognizeBoxNullable(tree);
10072 op1 = tree->gtOp.gtOp1;
10073 op2 = tree->gtGetOp2();
10077 #ifdef _TARGET_ARM_
10079 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
10081 switch (tree->TypeGet())
10084 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
10086 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
10098 #if !CPU_HAS_FP_SUPPORT
10099 tree = fgMorphToEmulatedFP(tree);
10102 /* Could this operator throw an exception? */
10103 if (fgGlobalMorph && tree->OperMayThrow())
10105 if ((tree->OperGet() != GT_IND) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
10107 /* Mark the tree node as potentially throwing an exception */
10108 tree->gtFlags |= GTF_EXCEPT;
10112 /*-------------------------------------------------------------------------
10113 * Process the first operand, if any
10119 #if LOCAL_ASSERTION_PROP
10120 // If we are entering the "then" part of a Qmark-Colon we must
10121 // save the state of the current copy assignment table
10122 // so that we can restore this state when entering the "else" part
10125 noway_assert(optLocalAssertionProp);
10126 if (optAssertionCount)
10128 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
10129 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
10130 origAssertionTab = (AssertionDsc*) ALLOCA(tabSize);
10131 origAssertionCount = optAssertionCount;
10132 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
10136 origAssertionCount = 0;
10137 origAssertionTab = NULL;
10140 #endif // LOCAL_ASSERTION_PROP
10142 // We might need a new MorphAddressContext context. (These are used to convey
10143 // parent context about how addresses being calculated will be used; see the
10144 // specification comment for MorphAddrContext for full details.)
10145 // Assume it's an Ind context to start.
10146 MorphAddrContext subIndMac1(MACK_Ind);
10147 MorphAddrContext* subMac1 = mac;
10148 if (subMac1 == NULL || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock)
10150 switch (tree->gtOper)
10153 if (subMac1 == NULL)
10155 subMac1 = &subIndMac1;
10156 subMac1->m_kind = MACK_Addr;
10160 // In a comma, the incoming context only applies to the rightmost arg of the
10161 // comma list. The left arg (op1) gets a fresh context.
10166 assert(subMac1 == NULL); // Should only occur at top level, since value is void.
10167 subMac1 = &s_CopyBlockMAC;
10170 // If the list is the first arg of a copy block, its two args should be evaluated as
10171 // IND-context addresses, separately.
10172 if (subMac1 != NULL && subMac1->m_kind == MACK_CopyBlock)
10174 subMac1 = &subIndMac1;
10180 subMac1 = &subIndMac1;
10187 // For additions, if we're in an IND context keep track of whether
10188 // all offsets added to the address are constant, and their sum.
10189 if (tree->gtOper == GT_ADD && subMac1 != NULL)
10191 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
10192 GenTreePtr otherOp = tree->gtOp.gtOp2;
10193 // Is the other operator a constant?
10194 if (otherOp->IsCnsIntOrI())
10196 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
10197 totalOffset += otherOp->gtIntConCommon.IconValue();
10198 if (totalOffset.IsOverflow())
10200 // We will consider an offset so large as to overflow as "not a constant" --
10201 // we will do a null check.
10202 subMac1->m_allConstantOffsets = false;
10206 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
10211 subMac1->m_allConstantOffsets = false;
10215 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
10217 #if LOCAL_ASSERTION_PROP
10218 // If we are exiting the "then" part of a Qmark-Colon we must
10219 // save the state of the current copy assignment table
10220 // so that we can merge this state with the "else" part exit
10223 noway_assert(optLocalAssertionProp);
10224 if (optAssertionCount)
10226 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
10227 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
10228 thenAssertionTab = (AssertionDsc*) ALLOCA(tabSize);
10229 thenAssertionCount = optAssertionCount;
10230 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
10234 thenAssertionCount = 0;
10235 thenAssertionTab = NULL;
10238 #endif // LOCAL_ASSERTION_PROP
10240 /* Morphing along with folding and inlining may have changed the
10241 * side effect flags, so we have to reset them
10243 * NOTE: Don't reset the exception flags on nodes that may throw */
10245 noway_assert(tree->gtOper != GT_CALL);
10247 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
10249 tree->gtFlags &= ~GTF_CALL;
10252 if (!tree->OperMayThrow())
10253 tree->gtFlags &= ~GTF_EXCEPT;
10255 /* Propagate the new flags */
10256 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
10258 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
10259 // Similarly for clsVar
10260 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
10261 tree->gtFlags &= ~GTF_GLOB_REF;
10264 /*-------------------------------------------------------------------------
10265 * Process the second operand, if any
10271 #if LOCAL_ASSERTION_PROP
10272 // If we are entering the "else" part of a Qmark-Colon we must
10273 // reset the state of the current copy assignment table
10276 noway_assert(optLocalAssertionProp);
10277 optAssertionReset(0);
10278 if (origAssertionCount)
10280 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
10281 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
10282 optAssertionReset(origAssertionCount);
10285 #endif // LOCAL_ASSERTION_PROP
10287 // We might need a new MorphAddressContext context to use in evaluating op2.
10288 // (These are used to convey parent context about how addresses being calculated
10289 // will be used; see the specification comment for MorphAddrContext for full details.)
10290 // Assume it's an Ind context to start.
10291 MorphAddrContext subIndMac2(MACK_Ind);
10292 switch (tree->gtOper)
10295 if (mac != NULL && mac->m_kind == MACK_Ind)
10297 GenTreePtr otherOp = tree->gtOp.gtOp1;
10298 // Is the other operator a constant?
10299 if (otherOp->IsCnsIntOrI())
10301 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
10305 mac->m_allConstantOffsets = false;
10310 if (mac != NULL && mac->m_kind == MACK_CopyBlock)
10318 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
10320 /* Propagate the side effect flags from op2 */
10322 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
10324 #if LOCAL_ASSERTION_PROP
10325 // If we are exiting the "else" part of a Qmark-Colon we must
10326 // merge the state of the current copy assignment table with
10327 // that of the exit of the "then" part.
10330 noway_assert(optLocalAssertionProp);
10331 // If either exit table has zero entries then
10332 // the merged table also has zero entries
10333 if (optAssertionCount == 0 || thenAssertionCount == 0)
10335 optAssertionReset(0);
10339 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
10340 if ( (optAssertionCount != thenAssertionCount) ||
10341 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0) )
10343 // Yes they are different so we have to find the merged set
10344 // Iterate over the copy asgn table removing any entries
10345 // that do not have an exact match in the thenAssertionTab
10346 AssertionIndex index = 1;
10347 while (index <= optAssertionCount)
10349 AssertionDsc* curAssertion = optGetAssertion(index);
10351 for (unsigned j=0; j < thenAssertionCount; j++)
10353 AssertionDsc* thenAssertion = &thenAssertionTab[j];
10355 // Do the left sides match?
10356 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
10357 (curAssertion->assertionKind == thenAssertion->assertionKind))
10359 // Do the right sides match?
10360 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
10361 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
10372 // If we fall out of the loop above then we didn't find
10373 // any matching entry in the thenAssertionTab so it must
10374 // have been killed on that path so we remove it here
10377 // The data at optAssertionTabPrivate[i] is to be removed
10381 printf("The QMARK-COLON ");
10383 printf(" removes assertion candidate #%d\n", index);
10386 optAssertionRemove(index);
10389 // The data at optAssertionTabPrivate[i] is to be kept
10395 #endif // LOCAL_ASSERTION_PROP
10398 DONE_MORPHING_CHILDREN:
10400 /*-------------------------------------------------------------------------
10401 * Now do POST-ORDER processing
10404 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
10405 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
10406 // is potentially too conservative, since they'll get treated as having side effects.
10407 // It is important to mark them as calls so if they are part of an argument list,
10408 // they will get sorted and processed properly (for example, it is important to handle
10409 // all nested calls before putting struct arguments in the argument registers). We
10410 // could mark the trees just before argument processing, but it would require a full
10411 // tree walk of the argument tree, so we just do it here, instead, even though we'll
10412 // mark non-argument trees (that will still get converted to calls, anyway).
10413 if (GenTree::OperIsShift(oper) &&
10414 (tree->TypeGet() == TYP_LONG) &&
10415 (op2->OperGet() != GT_CNS_INT))
10417 tree->gtFlags |= GTF_CALL;
10419 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
10421 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet()))
10422 && (op2 && !varTypeIsGC(op2->TypeGet())))
10424 // The tree is really not GC but was marked as such. Now that the
10425 // children have been unmarked, unmark the tree too.
10427 // Remember that GT_COMMA inherits it's type only from op2
10428 if (tree->gtOper == GT_COMMA)
10429 tree->gtType = genActualType(op2->TypeGet());
10431 tree->gtType = genActualType(op1->TypeGet());
10434 GenTreePtr oldTree = tree;
10436 GenTreePtr qmarkOp1 = NULL;
10437 GenTreePtr qmarkOp2 = NULL;
10439 if ((tree->OperGet() == GT_QMARK) &&
10440 (tree->gtOp.gtOp2->OperGet() == GT_COLON))
10442 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
10443 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
10446 // Try to fold it, maybe we get lucky,
10447 tree = gtFoldExpr(tree);
10449 if (oldTree != tree)
10451 /* if gtFoldExpr returned op1 or op2 then we are done */
10452 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
10455 /* If we created a comma-throw tree then we need to morph op1 */
10456 if (fgIsCommaThrow(tree))
10458 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
10459 fgMorphTreeDone(tree);
10465 else if (tree->OperKind() & GTK_CONST)
10470 /* gtFoldExpr could have used setOper to change the oper */
10471 oper = tree->OperGet();
10472 typ = tree->TypeGet();
10474 /* gtFoldExpr could have changed op1 and op2 */
10475 op1 = tree->gtOp.gtOp1;
10476 op2 = tree->gtGetOp2();
10478 // Do we have an integer compare operation?
10480 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
10482 // Are we comparing against zero?
10486 // Request that the codegen for op1 sets the condition flags
10487 // when it generates the code for op1.
10489 // Codegen for op1 must set the condition flags if
10490 // this method returns true.
10492 op1->gtRequestSetFlags();
10495 /*-------------------------------------------------------------------------
10496 * Perform the required oper-specific postorder morphing
10500 GenTreePtr cns1, cns2;
10501 GenTreePtr thenNode;
10502 GenTreePtr elseNode;
10503 size_t ival1, ival2;
10504 GenTreePtr lclVarTree;
10505 GenTreeLclVarCommon* lclVarCmnTree;
10506 FieldSeqNode* fieldSeq = NULL;
10512 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
10513 if (lclVarTree != NULL)
10515 lclVarTree->gtFlags |= GTF_VAR_DEF;
10518 /* If we are storing a small type, we might be able to omit a cast */
10519 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
10521 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
10523 var_types castType = op2->CastToType();
10525 // If we are performing a narrowing cast and
10526 // castType is larger or the same as op1's type
10527 // then we can discard the cast.
10529 if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
10531 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
10534 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
10536 /* We don't need to zero extend the setcc instruction */
10537 op2->gtType = TYP_BYTE;
10540 // If we introduced a CSE we may need to undo the optimization above
10541 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
10542 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
10543 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
10545 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
10546 LclVarDsc * varDsc = &lvaTable[varNum];
10548 /* We again need to zero extend the setcc instruction */
10549 op2->gtType = varDsc->TypeGet();
10557 fgAssignSetVarDef(tree);
10575 /* We can't CSE the LHS of an assignment */
10576 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
10577 op1->gtFlags |= GTF_DONT_CSE;
10583 /* Make sure we're allowed to do this */
10585 if (optValnumCSE_phase)
10587 // It is not safe to reorder/delete CSE's
10593 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
10595 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
10597 op1 = tree->gtOp.gtOp1;
10599 /* Since this can occur repeatedly we use a while loop */
10601 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
10602 (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
10603 (op1->gtType == TYP_INT) &&
10604 (op1->gtOverflow() == false))
10606 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
10608 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
10609 ival2 = cns2->gtIntCon.gtIconVal;
10611 if (op1->gtOper == GT_ADD)
10619 cns2->gtIntCon.gtIconVal = ival2;
10621 #ifdef _TARGET_64BIT_
10622 // we need to properly re-sign-extend or truncate as needed.
10623 cns2->AsIntCon()->TruncateOrSignExtend32();
10624 #endif // _TARGET_64BIT_
10626 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
10631 // Here we look for the following tree
10637 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
10639 // cast to unsigned allows test for both 0 and 1
10640 if ((cns2->gtOper == GT_CNS_INT) && (((size_t) cns2->gtIntConCommon.IconValue()) <= 1U))
10642 ival2 = (size_t) cns2->gtIntConCommon.IconValue();
10644 else // cast to UINT64 allows test for both 0 and 1
10645 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64) cns2->gtIntConCommon.LngValue()) <= 1ULL))
10647 ival2 = (size_t) cns2->gtIntConCommon.LngValue();
10650 if (ival2 != INT_MAX)
10652 // If we don't have a comma and relop, we can't do this optimization
10654 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
10656 // Here we look for the following transformation
10658 // EQ/NE Possible REVERSE(RELOP)
10660 // COMMA CNS 0/1 -> COMMA relop_op2
10662 // x RELOP x relop_op1
10664 // relop_op1 relop_op2
10668 GenTreePtr comma = op1;
10669 GenTreePtr relop = comma->gtOp.gtOp2;
10671 GenTreePtr relop_op1 = relop->gtOp.gtOp1;
10673 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
10677 gtReverseCond(relop);
10680 relop->gtOp.gtOp1 = comma;
10681 comma->gtOp.gtOp2 = relop_op1;
10683 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
10684 comma->gtFlags &= ~GTF_ALL_EFFECT;
10685 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
10686 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
10688 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
10689 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
10690 relop->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED|GTF_RELOP_QMARK|GTF_DONT_CSE|GTF_ALL_EFFECT);
10695 if (op1->gtOper == GT_COMMA)
10697 // Here we look for the following tree
10698 // and when the LCL_VAR is a temp we can fold the tree:
10702 // COMMA CNS 0/1 -> RELOP CNS 0/1
10710 GenTreePtr asg = op1->gtOp.gtOp1;
10711 GenTreePtr lcl = op1->gtOp.gtOp2;
10713 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
10714 if (asg->gtOper != GT_ASG)
10717 /* The right side of the comma must be a LCL_VAR temp */
10718 if (lcl->gtOper != GT_LCL_VAR)
10721 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount);
10723 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
10724 if (!lvaTable[lclNum].lvIsTemp)
10728 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
10729 // Fix 383856 X86/ARM ILGEN
10730 if (lclNumIsCSE(lclNum))
10734 /* We also must be assigning the result of a RELOP */
10735 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
10738 /* Both of the LCL_VAR must match */
10739 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
10742 /* If right side of asg is not a RELOP then skip */
10743 if (!asg->gtOp.gtOp2->OperIsCompare())
10746 LclVarDsc * varDsc = lvaTable + lclNum;
10748 /* Set op1 to the right side of asg, (i.e. the RELOP) */
10749 op1 = asg->gtOp.gtOp2;
10751 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
10752 DEBUG_DESTROY_NODE(lcl);
10754 /* This local variable should never be used again */
10756 // VSW 184221: Make RefCnt to zero to indicate that this local var
10757 // is not used any more. (Keey the lvType as is.)
10758 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
10759 // And then emitter::emitEndCodeGen will assert in the following line:
10760 // noway_assert( dsc->lvTracked);
10762 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
10763 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
10764 // and it only shows up twice.
10766 lvaTable[lclNum].lvRefCnt = 0;
10767 lvaTable[lclNum].lvaResetSortAgainFlag(this);
10771 if (op1->OperIsCompare())
10773 // Here we look for the following tree
10775 // EQ/NE -> RELOP/!RELOP
10780 // Note that we will remove/destroy the EQ/NE node and move
10781 // the RELOP up into it's location.
10783 /* Here we reverse the RELOP if necessary */
10785 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
10789 gtReverseCond(op1);
10792 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
10793 op1->gtType = tree->gtType;
10795 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
10796 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED|GTF_RELOP_QMARK|GTF_DONT_CSE);
10798 DEBUG_DESTROY_NODE(tree);
10804 // Now we check for a compare with the result of an '&' operator
10806 // Here we look for the following transformation:
10810 // AND CNS 0/1 -> AND CNS 0
10812 // RSZ/RSH CNS 1 x CNS (1 << y)
10816 if (op1->gtOper == GT_AND)
10818 GenTreePtr andOp = op1;
10819 GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
10821 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
10824 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
10827 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
10829 if (shiftAmount < 0)
10832 if (andOp->gtType == TYP_INT)
10834 if (!andOp->gtOp.gtOp2->IsCnsIntOrI())
10837 if (andOp->gtOp.gtOp2->gtIntCon.gtIconVal != 1)
10840 if (shiftAmount > 31)
10843 UINT32 newAndOperand = ((UINT32) 1) << shiftAmount;
10845 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
10847 // Reverse the cond if necessary
10850 gtReverseCond(tree);
10851 cns2->gtIntCon.gtIconVal = 0;
10852 oper = tree->gtOper;
10856 else if (andOp->gtType == TYP_LONG)
10858 if (andOp->gtOp.gtOp2->gtOper != GT_CNS_NATIVELONG)
10861 if (andOp->gtOp.gtOp2->gtIntConCommon.LngValue() != 1)
10864 if (shiftAmount > 63)
10867 UINT64 newAndOperand = ((UINT64) 1) << shiftAmount;
10869 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
10871 // Reverse the cond if necessary
10874 gtReverseCond(tree);
10875 cns2->gtIntConCommon.SetLngValue(0);
10876 oper = tree->gtOper;
10880 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
10882 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
10883 DEBUG_DESTROY_NODE(rshiftOp);
10885 } // END if (ival2 != INT_MAX)
10888 /* Now check for compares with small constant longs that can be cast to int */
10890 if (!cns2->OperIsConst())
10893 if (cns2->TypeGet() != TYP_LONG)
10896 /* Is the constant 31 bits or smaller? */
10898 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
10901 /* Is the first comparand mask operation of type long ? */
10903 if (op1->gtOper != GT_AND)
10905 /* Another interesting case: cast from int */
10907 if (op1->gtOper == GT_CAST &&
10908 op1->CastFromType() == TYP_INT &&
10909 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
10910 !op1->gtOverflow()) // cannot be an overflow checking cast
10912 /* Simply make this into an integer comparison */
10914 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
10915 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
10921 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
10923 /* Is the result of the mask effectively an INT ? */
10925 GenTreePtr andMask; andMask = op1->gtOp.gtOp2;
10926 if (andMask->gtOper != GT_CNS_NATIVELONG)
10928 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
10931 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
10933 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT,
10937 /* now replace the mask node (gtOp.gtOp2 of AND node) */
10939 noway_assert(andMask == op1->gtOp.gtOp2);
10941 ival1 = (int) andMask->gtIntConCommon.LngValue();
10942 andMask->SetOper(GT_CNS_INT);
10943 andMask->gtType = TYP_INT;
10944 andMask->gtIntCon.gtIconVal = ival1;
10946 /* now change the type of the AND node */
10948 op1->gtType = TYP_INT;
10950 /* finally we replace the comparand */
10952 ival2 = (int) cns2->gtIntConCommon.LngValue();
10953 cns2->SetOper(GT_CNS_INT);
10954 cns2->gtType = TYP_INT;
10956 noway_assert(cns2 == op2);
10957 cns2->gtIntCon.gtIconVal = ival2;
10966 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
10968 if (op2->gtOper == GT_CNS_INT)
10971 /* Check for "expr relop 1" */
10972 if (cns2->gtIntCon.gtIconVal == +1)
10974 /* Check for "expr >= 1" */
10977 /* Change to "expr > 0" */
10981 /* Check for "expr < 1" */
10982 else if (oper == GT_LT)
10984 /* Change to "expr <= 0" */
10989 /* Check for "expr relop -1" */
10990 else if ((cns2->gtIntCon.gtIconVal == -1) && ((oper == GT_LE) || (oper == GT_GT)))
10992 /* Check for "expr <= -1" */
10995 /* Change to "expr < 0" */
10999 /* Check for "expr > -1" */
11000 else if (oper == GT_GT)
11002 /* Change to "expr >= 0" */
11006 // IF we get here we should be changing 'oper'
11007 assert(tree->OperGet() != oper);
11009 // Keep the old ValueNumber for 'tree' as the new expr
11010 // will still compute the same value as before
11011 tree->SetOper(oper, GenTree::PRESERVE_VN);
11012 cns2->gtIntCon.gtIconVal = 0;
11014 // vnStore is null before the ValueNumber phase has run
11015 if (vnStore != nullptr)
11017 // Update the ValueNumber for 'cns2', as we just changed it to 0
11018 fgValueNumberTreeConst(cns2);
11021 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
11029 noway_assert(tree->OperKind() & GTK_RELOP);
11031 /* Check if the result of the comparison is used for a jump.
11032 * If not then only the int (i.e. 32 bit) case is handled in
11033 * the code generator through the (x86) "set" instructions.
11034 * For the rest of the cases, the simplest way is to
11035 * "simulate" the comparison with ?:
11037 * On ARM, we previously used the IT instruction, but the IT instructions
11038 * have mostly been declared obsolete and off-limits, so all cases on ARM
11039 * get converted to ?: */
11041 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) &&
11042 fgMorphRelopToQmark(op1))
11044 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
11047 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11048 op1->gtRequestSetFlags();
11050 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0)
11052 op2 = fgMorphTree(op2);
11054 tree = gtNewQmarkNode(TYP_INT, op1, op2);
11056 fgMorphTreeDone(tree);
11064 /* If op1 is a comma throw node then we won't be keeping op2 */
11065 if (fgIsCommaThrow(op1))
11068 /* Get hold of the two branches */
11070 noway_assert(op2->OperGet() == GT_COLON);
11071 elseNode = op2->AsColon()->ElseNode();
11072 thenNode = op2->AsColon()->ThenNode();
11074 /* Try to hoist assignments out of qmark colon constructs.
11075 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
11077 if (tree->TypeGet() == TYP_VOID &&
11078 thenNode->OperGet() == GT_ASG &&
11079 elseNode->OperGet() == GT_ASG &&
11080 thenNode->TypeGet() != TYP_LONG &&
11081 GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
11082 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
11084 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
11086 GenTreePtr asg = thenNode;
11087 GenTreePtr colon = op2;
11088 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
11089 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
11090 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
11091 asg->gtOp.gtOp2 = tree;
11093 // Asg will have all the flags that the QMARK had
11094 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
11096 // Colon flag won't have the flags that x had.
11097 colon->gtFlags &= ~GTF_ALL_EFFECT;
11098 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags |
11099 colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
11101 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
11102 DEBUG_DESTROY_NODE(elseNode);
11108 /* If the 'else' branch is empty swap the two branches and reverse the condition */
11110 if (elseNode->IsNothingNode())
11112 /* This can only happen for VOID ?: */
11113 noway_assert(op2->gtType == TYP_VOID);
11115 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
11116 if (thenNode->IsNothingNode())
11118 // We may be able to throw away op1 (unless it has side-effects)
11120 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
11122 /* Just return a a Nop Node */
11127 /* Just return the relop, but clear the special flags. Note
11128 that we can't do that for longs and floats (see code under
11129 COMPARE label above) */
11131 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
11133 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
11140 GenTreePtr tmp = elseNode;
11142 op2->AsColon()->ElseNode() = elseNode = thenNode;
11143 op2->AsColon()->ThenNode() = thenNode = tmp;
11144 gtReverseCond(op1);
11148 #if !defined(_TARGET_ARM_)
11149 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
11151 // Don't do this optimization for ARM: we always require assignment
11152 // to boolean to remain ?:, since we don't have any way to generate
11153 // this with straight-line code, like x86 does using setcc (at least
11154 // after the IT instruction is deprecated).
11156 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT &&
11157 genActualType(typ) == TYP_INT &&
11158 thenNode->gtOper == GT_CNS_INT &&
11159 elseNode->gtOper == GT_CNS_INT)
11161 ival1 = thenNode->gtIntCon.gtIconVal;
11162 ival2 = elseNode->gtIntCon.gtIconVal;
11164 // Is one constant 0 and the other 1?
11165 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
11167 // If the constants are {1, 0}, reverse the condition
11169 gtReverseCond(op1);
11171 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
11172 // needs to materialize the result as a 0 or 1.
11173 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
11174 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
11176 DEBUG_DESTROY_NODE(tree);
11177 DEBUG_DESTROY_NODE(op2);
11182 #endif // !_TARGET_ARM_
11184 break; // end case GT_QMARK
11189 #ifndef _TARGET_64BIT_
11190 if (typ == TYP_LONG)
11192 // This must be GTF_MUL_64RSLT
11193 assert(tree->gtIsValid64RsltMul());
11196 #endif // _TARGET_64BIT_
11201 if (tree->gtOverflow())
11204 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
11207 if (op2->IsCnsIntOrI())
11209 /* Negate the constant and change the node to be "+" */
11211 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
11213 tree->ChangeOper(oper);
11217 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
11220 if (op1->IsCnsIntOrI())
11222 noway_assert(varTypeIsIntOrI(tree));
11224 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same
11225 // as the type of the tree, i.e. tree->gtType.
11226 fgMorphTreeDone(op2);
11229 tree->ChangeOper(oper);
11233 /* No match - exit */
11237 #ifdef _TARGET_ARM64_
11239 if (!varTypeIsFloating(tree->gtType))
11241 // Codegen for this instruction needs to be able to throw two exceptions:
11242 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
11243 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
11247 // Codegen for this instruction needs to be able to throw one exception:
11248 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
11255 if (tree->gtOverflow())
11257 tree->gtRequestSetFlags();
11259 // Add the excptn-throwing basic block to jump to on overflow
11261 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
11263 // We can't do any commutative morphing for overflow instructions
11274 /* Commute any non-REF constants to the right */
11277 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
11279 // TODO-Review: We used to assert here that
11280 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
11281 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
11282 // and would sometimes hit this assertion. This may indicate a missed "remorph".
11283 // Task is to re-enable this assertion and investigate.
11285 /* Swap the operands */
11286 tree->gtOp.gtOp1 = op2;
11287 tree->gtOp.gtOp2 = op1;
11290 op2 = tree->gtOp.gtOp2;
11293 /* See if we can fold GT_ADD nodes. */
11295 if (oper == GT_ADD)
11297 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
11299 if (op1->gtOper == GT_ADD &&
11300 op2->gtOper == GT_ADD &&
11301 !gtIsActiveCSE_Candidate(op2) &&
11302 op1->gtOp.gtOp2->gtOper == GT_CNS_INT &&
11303 op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
11304 !op1->gtOverflow() &&
11305 !op2->gtOverflow() )
11307 cns1 = op1->gtOp.gtOp2;
11308 cns2 = op2->gtOp.gtOp2;
11309 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
11310 #ifdef _TARGET_64BIT_
11311 if (cns1->TypeGet() == TYP_INT)
11313 // we need to properly re-sign-extend or truncate after adding two int constants above
11314 cns1->AsIntCon()->TruncateOrSignExtend32();
11316 #endif //_TARGET_64BIT_
11318 tree->gtOp.gtOp2 = cns1;
11319 DEBUG_DESTROY_NODE(cns2);
11321 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
11322 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
11323 DEBUG_DESTROY_NODE(op2);
11324 op2 = tree->gtOp.gtOp2;
11327 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
11329 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
11331 if (op1->gtOper == GT_ADD &&
11332 !gtIsActiveCSE_Candidate(op1) &&
11333 op1->gtOp.gtOp2->IsCnsIntOrI() &&
11334 !op1->gtOverflow() &&
11335 op1->gtOp.gtOp2->OperGet() == op2->OperGet())
11337 cns1 = op1->gtOp.gtOp2;
11338 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() + op2->gtIntConCommon.IconValue());
11339 #ifdef _TARGET_64BIT_
11340 if (op2->TypeGet() == TYP_INT)
11342 // we need to properly re-sign-extend or truncate after adding two int constants above
11343 op2->AsIntCon()->TruncateOrSignExtend32();
11345 #endif //_TARGET_64BIT_
11347 if (cns1->OperGet() == GT_CNS_INT)
11349 op2->gtIntCon.gtFieldSeq =
11350 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq,
11351 op2->gtIntCon.gtFieldSeq);
11353 DEBUG_DESTROY_NODE(cns1);
11355 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
11356 DEBUG_DESTROY_NODE(op1);
11357 op1 = tree->gtOp.gtOp1;
11362 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
11365 // If this addition is adding an offset to a null pointer,
11366 // avoid the work and yield the null pointer immediately.
11367 // Dereferencing the pointer in either case will have the
11370 if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet()))
11372 op2->gtType = tree->gtType;
11373 DEBUG_DESTROY_NODE(op1);
11374 DEBUG_DESTROY_NODE(tree);
11378 // Remove the addition iff it won't change the tree type
11381 if (!gtIsActiveCSE_Candidate(op2) &&
11382 ((op1->TypeGet() == tree->TypeGet()) ||
11383 (op1->TypeGet() != TYP_REF)))
11385 if (fgGlobalMorph &&
11386 (op2->OperGet() == GT_CNS_INT) &&
11387 (op2->gtIntCon.gtFieldSeq != NULL) &&
11388 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
11390 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
11393 DEBUG_DESTROY_NODE(op2);
11394 DEBUG_DESTROY_NODE(tree);
11401 /* See if we can fold GT_MUL by const nodes */
11402 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
11404 #ifndef _TARGET_64BIT_
11405 noway_assert(typ <= TYP_UINT);
11406 #endif // _TARGET_64BIT_
11407 noway_assert(!tree->gtOverflow());
11409 ssize_t mult = op2->gtIntConCommon.IconValue();
11410 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT &&
11411 op2->gtIntCon.gtFieldSeq != nullptr &&
11412 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
11414 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
11418 // We may be able to throw away op1 (unless it has side-effects)
11420 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
11422 DEBUG_DESTROY_NODE(op1);
11423 DEBUG_DESTROY_NODE(tree);
11424 return op2; // Just return the "0" node
11427 // We need to keep op1 for the side-effects. Hang it off
11430 tree->ChangeOper(GT_COMMA);
11434 size_t abs_mult = (mult >= 0) ? mult : -mult;
11435 size_t lowestBit = genFindLowestBit(abs_mult);
11436 bool changeToShift = false;
11438 // is it a power of two? (positive or negative)
11439 if (abs_mult == lowestBit)
11441 // if negative negate (min-int does not need negation)
11442 if (mult < 0 && mult != SSIZE_T_MIN)
11444 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
11445 fgMorphTreeDone(op1);
11448 // If "op2" is a constant array index, the other multiplicand must be a constant.
11449 // Transfer the annotation to the other one.
11450 if (op2->OperGet() == GT_CNS_INT &&
11451 op2->gtIntCon.gtFieldSeq != nullptr &&
11452 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
11454 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
11455 GenTreePtr otherOp = op1;
11456 if (otherOp->OperGet() == GT_NEG)
11457 otherOp = otherOp->gtOp.gtOp1;
11458 assert(otherOp->OperGet() == GT_CNS_INT);
11459 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
11460 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
11465 DEBUG_DESTROY_NODE(op2);
11466 DEBUG_DESTROY_NODE(tree);
11470 /* Change the multiplication into a shift by log2(val) bits */
11471 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
11472 changeToShift = true;
11475 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
11477 int shift = genLog2(lowestBit);
11478 ssize_t factor = abs_mult >> shift;
11480 if (factor == 3 || factor == 5 || factor == 9)
11482 // if negative negate (min-int does not need negation)
11483 if (mult < 0 && mult != SSIZE_T_MIN)
11485 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
11486 fgMorphTreeDone(op1);
11489 GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
11490 if (op2IsConstIndex)
11492 factorIcon->AsIntCon()->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
11495 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
11496 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
11497 fgMorphTreeDone(op1);
11499 op2->gtIntConCommon.SetIconValue(shift);
11500 changeToShift = true;
11503 #endif // LEA_AVAILABLE
11506 // vnStore is null before the ValueNumber phase has run
11507 if (vnStore != nullptr)
11509 // Update the ValueNumber for 'op2', as we just changed the constant
11510 fgValueNumberTreeConst(op2);
11513 // Keep the old ValueNumber for 'tree' as the new expr
11514 // will still compute the same value as before
11515 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
11517 goto DONE_MORPHING_CHILDREN;
11520 else if (fgOperIsBitwiseRotationRoot(oper))
11522 tree = fgRecognizeAndMorphBitwiseRotation(tree);
11524 // fgRecognizeAndMorphBitwiseRotation may return a new tree
11525 oper = tree->OperGet();
11526 typ = tree->TypeGet();
11527 op1 = tree->gtOp.gtOp1;
11528 op2 = tree->gtOp.gtOp2;
11537 /* Any constant cases should have been folded earlier */
11538 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
11543 noway_assert(varTypeIsFloating(op1->TypeGet()));
11545 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
11550 // Can not remove a GT_IND if it is currently a CSE candidate.
11551 if (gtIsActiveCSE_Candidate(tree))
11554 bool foldAndReturnTemp; foldAndReturnTemp = false;
11558 /* Try to Fold *(&X) into X */
11559 if (op1->gtOper == GT_ADDR)
11561 // Can not remove a GT_ADDR if it is currently a CSE candidate.
11562 if (gtIsActiveCSE_Candidate(op1))
11565 temp = op1->gtOp.gtOp1; // X
11567 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
11568 // they are the *same* struct type. In fact, they almost certainly aren't. If the
11569 // address has an associated field sequence, that identifies this case; go through
11570 // the "lcl_fld" path rather than this one.
11571 FieldSeqNode* addrFieldSeq = NULL; // This is an unused out parameter below.
11572 if ( typ == temp->TypeGet()
11573 && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
11575 foldAndReturnTemp = true;
11577 else if (temp->OperIsLocal())
11579 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
11580 LclVarDsc * varDsc = &lvaTable[lclNum];
11582 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
11583 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
11585 noway_assert(varTypeIsStruct(varDsc));
11587 // We will try to optimize when we have a single field struct that is being struct promoted
11588 if (varDsc->lvFieldCnt == 1)
11590 unsigned lclNumFld = varDsc->lvFieldLclStart;
11591 // just grab the promoted field
11592 LclVarDsc * fieldVarDsc = &lvaTable[lclNumFld];
11594 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset is zero
11595 if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0))
11597 // We can just use the existing promoted field LclNum
11598 temp->gtLclVarCommon.SetLclNum(lclNumFld);
11599 temp->gtType = fieldVarDsc->TypeGet();
11601 foldAndReturnTemp = true;
11605 // If the type of the IND (typ) is a "small int", and the type of the local has the
11606 // same width, then we can reduce to just the local variable -- it will be
11607 // correctly normalized, and signed/unsigned differences won't matter.
11609 // The below transformation cannot be applied if the local var needs to be normalized on load.
11610 else if ( varTypeIsSmall(typ) &&
11611 (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
11612 !lvaTable[lclNum].lvNormalizeOnLoad() )
11614 tree->gtType = temp->gtType;
11615 foldAndReturnTemp = true;
11619 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. nullptr)
11620 assert(fieldSeq == nullptr);
11621 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
11622 assert(b || fieldSeq == nullptr);
11624 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
11626 // Append the field sequence, change the type.
11627 temp->AsLclFld()->gtFieldSeq = GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
11628 temp->gtType = tree->TypeGet();
11630 foldAndReturnTemp = true;
11633 // Otherwise will will fold this into a GT_LCL_FLD below
11634 // where we check (temp != nullptr)
11636 else // !temp->OperIsLocal()
11638 // We don't try to fold away the GT_IND/GT_ADDR for this case
11642 else if (op1->OperGet() == GT_ADD)
11644 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
11646 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR &&
11647 op1->gtOp.gtOp2->OperGet() == GT_CNS_INT
11648 && (!(opts.MinOpts() || opts.compDbgCode)))
11650 // No overflow arithmetic with pointers
11651 noway_assert(!op1->gtOverflow());
11653 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
11654 if (!temp->OperIsLocal())
11660 // Can not remove the GT_ADDR if it is currently a CSE candidate.
11661 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
11664 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
11665 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
11667 // Does the address have an associated zero-offset field sequence?
11668 FieldSeqNode* addrFieldSeq = NULL;
11669 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
11671 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
11675 typ == temp->TypeGet() &&
11676 temp->TypeGet() != TYP_STRUCT)
11678 noway_assert(!varTypeIsGC(temp->TypeGet()));
11679 foldAndReturnTemp = true;
11683 // The emitter can't handle large offsets
11684 if (ival1 != (unsigned short)ival1)
11687 // The emitter can get confused by invalid offsets
11688 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
11691 #ifdef _TARGET_ARM_
11692 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
11694 if (varTypeIsFloating(tree->TypeGet()))
11696 if ((ival1 % emitTypeSize(tree->TypeGet())) != 0)
11698 tree->gtFlags |= GTF_IND_UNALIGNED;
11704 // Now we can fold this into a GT_LCL_FLD below
11705 // where we check (temp != nullptr)
11710 // If we have decided to fold, then temp cannot be nullptr
11711 if (foldAndReturnTemp)
11713 assert(temp != nullptr);
11717 if (temp != nullptr)
11719 noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
11721 // If we haven't already decided to fold this expression
11723 if (!foldAndReturnTemp)
11725 noway_assert(temp->OperIsLocal());
11726 LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]);
11727 // Make sure we don't separately promote the fields of this struct.
11728 if (varDsc->lvRegStruct)
11730 // We can enregister, but can't promote.
11731 varDsc->lvPromoted = false;
11735 lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
11738 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
11739 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
11740 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
11742 if (temp->OperGet() == GT_LCL_FLD)
11744 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
11745 temp->AsLclFld()->gtFieldSeq =
11746 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
11750 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
11751 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
11752 if (fieldSeq != NULL) // If it does represent a field, note that.
11753 temp->AsLclFld()->gtFieldSeq = fieldSeq;
11755 temp->gtType = tree->gtType;
11756 foldAndReturnTemp = true;
11759 assert(foldAndReturnTemp == true);
11761 // Keep the DONT_CSE flag in sync
11762 // (i.e keep the original value of this flag from tree)
11763 // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1
11765 temp->gtFlags &= ~GTF_DONT_CSE;
11766 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
11768 noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
11769 noway_assert(temp->gtType == tree->gtType);
11771 if (op1->OperGet() == GT_ADD)
11773 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
11774 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
11776 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
11777 DEBUG_DESTROY_NODE(tree); // GT_IND
11782 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
11783 // could result in an invalid value number for the newly generated GT_IND node.
11784 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
11786 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
11787 // TBD: this transformation is currently necessary for correctness -- it might
11788 // be good to analyze the failures that result if we don't do this, and fix them
11789 // in other ways. Ideally, this should be optional.
11790 GenTreePtr commaNode = op1;
11791 unsigned treeFlags = tree->gtFlags;
11792 commaNode->gtType = typ;
11793 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is dangerous, clear the GTF_REVERSE_OPS at least.
11795 commaNode->gtFlags |= GTF_MORPHED;
11797 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
11799 commaNode = commaNode->gtOp.gtOp2;
11800 commaNode->gtType = typ;
11801 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is dangerous, clear the GTF_REVERSE_OPS at least.
11803 commaNode->gtFlags |= GTF_MORPHED;
11806 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
11810 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
11812 GetArrayInfoMap()->Remove(tree);
11815 op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
11816 op1->gtFlags = treeFlags;
11819 GetArrayInfoMap()->Set(op1, arrInfo);
11822 op1->gtFlags |= GTF_MORPHED;
11824 commaNode->gtOp.gtOp2 = op1;
11832 // Can not remove op1 if it is currently a CSE candidate.
11833 if (gtIsActiveCSE_Candidate(op1))
11836 if (op1->OperGet() == GT_IND)
11838 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
11840 // Can not remove a GT_ADDR if it is currently a CSE candidate.
11841 if (gtIsActiveCSE_Candidate(tree))
11844 // Perform the transform ADDR(IND(...)) == (...).
11845 GenTreePtr addr = op1->gtOp.gtOp1;
11847 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
11849 DEBUG_DESTROY_NODE(op1);
11850 DEBUG_DESTROY_NODE(tree);
11855 else if (op1->gtOper == GT_CAST)
11857 GenTreePtr casting = op1->gtCast.CastOp();
11858 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
11860 DEBUG_DESTROY_NODE(op1);
11861 tree->gtOp.gtOp1 = op1 = casting;
11864 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
11866 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
11867 // (Be sure to mark "z" as an l-value...)
11868 GenTreePtr commaNode = op1;
11869 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
11871 commaNode = commaNode->gtOp.gtOp2;
11873 // The top-level addr might be annotated with a zeroOffset field.
11874 FieldSeqNode* zeroFieldSeq = nullptr;
11875 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
11877 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
11879 // If the node we're about to put under a GT_ADDR is a GT_IND, the indirection
11880 // doesn't need to be materialized, since we only want the addressing mode. Because
11881 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
11882 // as a side effect.
11883 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
11884 if (commaOp2->gtOper == GT_IND)
11886 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
11889 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
11893 // Transfer the annotation to the new GT_ADDR node.
11894 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
11896 commaNode->gtOp.gtOp2 = op1;
11897 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
11898 // might give op1 a type different from byref (like, say, native int). So now go back and give
11899 // all the comma nodes the type of op1.
11901 while (commaNode->gtOper == GT_COMMA)
11903 commaNode->gtType = op1->gtType; commaNode->gtFlags |= op1->gtFlags;
11905 commaNode->gtFlags |= GTF_MORPHED;
11907 commaNode = commaNode->gtOp.gtOp2;
11913 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
11914 op1->gtFlags |= GTF_DONT_CSE;
11920 /* Mark the nodes that are conditionally executed */
11921 fgWalkTreePre(&tree, gtMarkColonCond);
11923 /* Since we're doing this postorder we clear this if it got set by a child */
11924 fgRemoveRestOfBlock = false;
11929 /* Special case: trees that don't produce a value */
11930 if ((op2->OperKind() & GTK_ASGOP) ||
11931 (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
11934 typ = tree->gtType = TYP_VOID;
11937 // If we are in the Valuenum CSE phase then don't morph away anything as these
11938 // nodes may have CSE defs/uses in them.
11940 if (!optValnumCSE_phase)
11942 //Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this is
11945 GenTreePtr op1SideEffects = NULL;
11946 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
11947 // hoisted expressions in loops.
11948 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
11949 if (op1SideEffects)
11951 //Replace the left hand side with the side effect list.
11952 tree->gtOp.gtOp1 = op1SideEffects;
11953 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
11957 /* The left operand is worthless, throw it away */
11958 if (lvaLocalVarRefCounted)
11960 lvaRecursiveDecRefCounts(op1);
11962 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
11963 DEBUG_DESTROY_NODE(tree);
11964 DEBUG_DESTROY_NODE(op1);
11968 /* If the right operand is just a void nop node, throw it away */
11969 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
11971 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
11972 DEBUG_DESTROY_NODE(tree);
11973 DEBUG_DESTROY_NODE(op2);
11982 /* Special case if fgRemoveRestOfBlock is set to true */
11983 if (fgRemoveRestOfBlock)
11985 if (fgIsCommaThrow(op1, true))
11987 GenTreePtr throwNode = op1->gtOp.gtOp1;
11988 noway_assert(throwNode->gtType == TYP_VOID);
11993 noway_assert(op1->OperKind() & GTK_RELOP);
11994 noway_assert(op1->gtFlags & GTF_EXCEPT);
11996 // We need to keep op1 for the side-effects. Hang it off
11999 tree->ChangeOper(GT_COMMA);
12000 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
12002 // Additionally since we're eliminating the JTRUE
12003 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
12004 // So we change it into a GT_COMMA as well.
12005 op1->ChangeOper(GT_COMMA);
12006 op1->gtType = op1->gtOp.gtOp1->gtType;
12015 noway_assert(oper == tree->gtOper);
12017 // If we are in the Valuenum CSE phase then don't morph away anything as these
12018 // nodes may have CSE defs/uses in them.
12020 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList())
12022 /* Check for op1 as a GT_COMMA with a unconditional throw node */
12023 if (op1 && fgIsCommaThrow(op1, true))
12025 if ((op1->gtFlags & GTF_COLON_COND) == 0)
12027 /* We can safely throw out the rest of the statements */
12028 fgRemoveRestOfBlock = true;
12031 GenTreePtr throwNode = op1->gtOp.gtOp1;
12032 noway_assert(throwNode->gtType == TYP_VOID);
12034 if (oper == GT_COMMA)
12036 /* Both tree and op1 are GT_COMMA nodes */
12037 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
12038 tree->gtOp.gtOp1 = throwNode;
12041 else if (oper != GT_NOP)
12043 if (genActualType(typ) == genActualType(op1->gtType))
12045 /* The types match so, return the comma throw node as the new tree */
12050 if (typ == TYP_VOID)
12052 // Return the throw node
12057 GenTreePtr commaOp2 = op1->gtOp.gtOp2;
12059 // need type of oper to be same as tree
12060 if (typ == TYP_LONG)
12062 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
12063 commaOp2->gtIntConCommon.SetLngValue(0);
12064 /* Change the types of oper and commaOp2 to TYP_LONG */
12065 op1->gtType = commaOp2->gtType = TYP_LONG;
12067 else if (varTypeIsFloating(typ))
12069 commaOp2->ChangeOperConst(GT_CNS_DBL);
12070 commaOp2->gtDblCon.gtDconVal = 0.0;
12071 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
12072 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
12076 commaOp2->ChangeOperConst(GT_CNS_INT);
12077 commaOp2->gtIntConCommon.SetIconValue(0);
12078 /* Change the types of oper and commaOp2 to TYP_INT */
12079 op1->gtType = commaOp2->gtType = TYP_INT;
12082 /* Return the GT_COMMA node as the new tree */
12089 /* Check for op2 as a GT_COMMA with a unconditional throw */
12091 if (op2 && fgIsCommaThrow(op2, true))
12093 if ((op2->gtFlags & GTF_COLON_COND) == 0)
12095 /* We can safely throw out the rest of the statements */
12096 fgRemoveRestOfBlock = true;
12099 // If op1 has no side-effects
12100 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
12102 // If tree is an asg node
12103 if (tree->OperIsAssignment())
12105 /* Return the throw node as the new tree */
12106 return op2->gtOp.gtOp1;
12109 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
12111 /* Return the throw node as the new tree */
12112 return op2->gtOp.gtOp1;
12115 // If tree is a comma node
12116 if (tree->OperGet() == GT_COMMA)
12118 /* Return the throw node as the new tree */
12119 return op2->gtOp.gtOp1;
12122 /* for the shift nodes the type of op2 can differ from the tree type */
12123 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
12125 noway_assert(GenTree::OperIsShiftOrRotate(oper));
12127 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
12129 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
12130 commaOp2->gtIntConCommon.SetLngValue(0);
12132 /* Change the types of oper and commaOp2 to TYP_LONG */
12133 op2->gtType = commaOp2->gtType = TYP_LONG;
12136 if ((genActualType(typ) == TYP_INT) && (genActualType(op2->gtType) == TYP_LONG ||
12137 varTypeIsFloating(op2->TypeGet())))
12139 // An example case is comparison (say GT_GT) of two longs or floating point values.
12141 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
12143 commaOp2->ChangeOperConst(GT_CNS_INT);
12144 commaOp2->gtIntCon.gtIconVal = 0;
12145 /* Change the types of oper and commaOp2 to TYP_INT */
12146 op2->gtType = commaOp2->gtType = TYP_INT;
12149 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
12151 noway_assert(tree->OperGet() == GT_ADD);
12153 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
12155 commaOp2->ChangeOperConst(GT_CNS_INT);
12156 commaOp2->gtIntCon.gtIconVal = 0;
12157 /* Change the types of oper and commaOp2 to TYP_BYREF */
12158 op2->gtType = commaOp2->gtType = TYP_BYREF;
12161 /* types should now match */
12162 noway_assert( (genActualType(typ) == genActualType(op2->gtType)));
12164 /* Return the GT_COMMA node as the new tree */
12170 /*-------------------------------------------------------------------------
12171 * Optional morphing is done if tree transformations is permitted
12174 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
12177 tree = fgMorphSmpOpOptional(tree->AsOp());
12179 } // extra scope for gcc workaround
12183 #pragma warning(pop)
12187 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
12189 genTreeOps oper = tree->gtOper;
12190 GenTree* op1 = tree->gtOp1;
12191 GenTree* op2 = tree->gtOp2;
12192 var_types typ = tree->TypeGet();
12194 if (GenTree::OperIsCommutative(oper))
12196 /* Swap the operands so that the more expensive one is 'op1' */
12198 if (tree->gtFlags & GTF_REVERSE_OPS)
12206 tree->gtFlags &= ~GTF_REVERSE_OPS;
12209 if (oper == op2->gtOper)
12211 /* Reorder nested operators at the same precedence level to be
12212 left-recursive. For example, change "(a+(b+c))" to the
12213 equivalent expression "((a+b)+c)".
12216 /* Things are handled differently for floating-point operators */
12218 if (!varTypeIsFloating(tree->TypeGet()))
12220 fgMoveOpsLeft(tree);
12230 /* Change "((x+icon)+y)" to "((x+y)+icon)"
12231 Don't reorder floating-point operations */
12233 if ((oper == GT_ADD) && !tree->gtOverflow() &&
12234 (op1->gtOper == GT_ADD) && ! op1->gtOverflow() && varTypeIsIntegralOrI(typ))
12236 GenTreePtr ad2 = op1->gtOp.gtOp2;
12238 if (op2->OperIsConst() == 0 &&
12239 ad2->OperIsConst() != 0)
12251 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
12252 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
12253 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
12256 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
12259 if (varTypeIsGC(op2->TypeGet()))
12261 noway_assert(varTypeIsGC(typ));
12266 op1 ->gtOp.gtOp2 = op2;
12267 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
12275 /*-------------------------------------------------------------------------
12276 * Perform optional oper-specific postorder morphing
12282 bool dstIsSafeLclVar;
12286 /* We'll convert "a = a <op> x" into "a <op>= x" */
12287 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
12290 if (typ == TYP_LONG)
12294 /* Make sure we're allowed to do this */
12296 if (optValnumCSE_phase)
12298 // It is not safe to reorder/delete CSE's
12302 /* Are we assigning to a GT_LCL_VAR ? */
12304 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
12306 /* If we have a GT_LCL_VAR, then is the address taken? */
12307 if (dstIsSafeLclVar)
12309 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
12310 LclVarDsc * varDsc = lvaTable + lclNum;
12312 noway_assert(lclNum < lvaCount);
12314 /* Is the address taken? */
12315 if (varDsc->lvAddrExposed)
12317 dstIsSafeLclVar = false;
12319 else if (op2->gtFlags & GTF_ASG)
12325 if (!dstIsSafeLclVar)
12327 if (op2->gtFlags & GTF_ASG)
12330 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
12334 /* Special case: a cast that can be thrown away */
12336 if (op1->gtOper == GT_IND &&
12337 op2->gtOper == GT_CAST &&
12338 !op2->gtOverflow() )
12344 srct = op2->gtCast.CastOp()->TypeGet();
12345 cast = (var_types) op2->CastToType();
12346 dstt = op1->TypeGet();
12348 /* Make sure these are all ints and precision is not lost */
12350 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
12351 op2 = tree->gtOp2 = op2->gtCast.CastOp();
12354 /* Make sure we have the operator range right */
12356 noway_assert(GT_SUB == GT_ADD + 1);
12357 noway_assert(GT_MUL == GT_ADD + 2);
12358 noway_assert(GT_DIV == GT_ADD + 3);
12359 noway_assert(GT_MOD == GT_ADD + 4);
12360 noway_assert(GT_UDIV== GT_ADD + 5);
12361 noway_assert(GT_UMOD== GT_ADD + 6);
12363 noway_assert(GT_OR == GT_ADD + 7);
12364 noway_assert(GT_XOR == GT_ADD + 8);
12365 noway_assert(GT_AND == GT_ADD + 9);
12367 noway_assert(GT_LSH == GT_ADD + 10);
12368 noway_assert(GT_RSH == GT_ADD + 11);
12369 noway_assert(GT_RSZ == GT_ADD + 12);
12371 /* Check for a suitable operator on the RHS */
12373 cmop = op2->OperGet();
12378 // GT_CHS only supported for integer types
12379 if ( varTypeIsFloating(tree->TypeGet()))
12385 // GT_ASG_MUL only supported for floating point types
12386 if (!varTypeIsFloating(tree->TypeGet()))
12393 if (op2->gtOverflow())
12395 /* Disable folding into "<op>=" if the result can be
12396 visible to anyone as <op> may throw an exception and
12397 the assignment should not proceed
12398 We are safe with an assignment to a local variables
12400 if (ehBlockHasExnFlowDsc(compCurBB))
12402 if (!dstIsSafeLclVar)
12405 #ifndef _TARGET_AMD64_
12406 // This is hard for byte-operations as we need to make
12407 // sure both operands are in RBM_BYTE_REGS.
12408 if (varTypeIsByte(op2->TypeGet()))
12410 #endif // _TARGET_AMD64_
12415 // GT_ASG_DIV only supported for floating point types
12416 if (!varTypeIsFloating(tree->TypeGet()))
12425 if (typ == TYP_LONG)
12435 /* TODO: allow non-const long assignment operators */
12437 if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
12443 bool bReverse = false;
12444 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
12445 if (bAsgOpFoldable)
12449 // We will transform this from "a = x <op> a" to "a <op>= x"
12450 // so we can now destroy the duplicate "a"
12451 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
12452 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
12455 /* Special case: "x |= -1" and "x &= 0" */
12456 if (cmop == GT_AND || cmop == GT_OR)
12458 if (op2->gtOp.gtOp2->IsCnsIntOrI())
12460 ssize_t icon = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
12462 noway_assert(typ <= TYP_UINT);
12464 if ((cmop == GT_AND && icon == 0) ||
12465 (cmop == GT_OR && icon == -1))
12467 /* Simply change to an assignment */
12468 tree->gtOp2 = op2->gtOp.gtOp2;
12474 if (cmop == GT_NEG)
12476 /* This is "x = -x;", use the flipsign operator */
12478 tree->ChangeOper (GT_CHS);
12480 if (op1->gtOper == GT_LCL_VAR)
12481 op1->gtFlags |= GTF_VAR_USEASG;
12483 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
12488 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
12490 // Changing from x = x op y to x op= y when x is a small integer type
12491 // makes the op size smaller (originally the op size was 32 bits, after
12492 // sign or zero extension of x, and there is an implicit truncation in the
12494 // This is ok in most cases because the upper bits were
12495 // lost when assigning the op result to a small type var,
12496 // but it may not be ok for the right shift operation where the higher bits
12497 // could be shifted into the lower bits and preserved.
12498 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) == (sbyte)x >>signed y))
12499 // as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) == (ubyte)x >>unsigned y), but
12500 // signed right shift of an unigned small type may give the wrong result:
12501 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
12502 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
12503 // The result becomes correct if we use >>unsigned instead of >>signed.
12504 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
12508 /* Replace with an assignment operator */
12509 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
12510 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
12511 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
12512 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
12513 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
12514 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
12515 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
12516 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
12518 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
12519 tree->gtOp2 = op2->gtOp.gtOp2;
12521 /* Propagate GTF_OVERFLOW */
12523 if (op2->gtOverflowEx())
12525 tree->gtType = op2->gtType;
12526 tree->gtFlags |= (op2->gtFlags &
12527 (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED));
12530 #if FEATURE_SET_FLAGS
12532 /* Propagate GTF_SET_FLAGS */
12533 if (op2->gtSetFlags())
12535 tree->gtRequestSetFlags();
12538 #endif // FEATURE_SET_FLAGS
12540 DEBUG_DESTROY_NODE(op2);
12543 /* The target is used as well as being defined */
12544 if (op1->OperIsLocal())
12545 op1->gtFlags |= GTF_VAR_USEASG;
12548 #if CPU_HAS_FP_SUPPORT
12549 /* Check for the special case "x += y * x;" */
12551 // GT_ASG_MUL only supported for floating point types
12552 if (cmop != GT_ADD && cmop != GT_SUB)
12555 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
12557 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
12559 /* Change "x += x * y" into "x *= (y + 1)" */
12561 op2 = op2->gtOp.gtOp2;
12563 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
12565 /* Change "x += y * x" into "x *= (y + 1)" */
12567 op2 = op2->gtOp.gtOp1;
12572 op1 = gtNewDconNode(1.0);
12574 /* Now make the "*=" node */
12576 if (cmop == GT_ADD)
12578 /* Change "x += x * y" into "x *= (y + 1)" */
12580 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD,
12587 /* Change "x -= x * y" into "x *= (1 - y)" */
12589 noway_assert(cmop == GT_SUB);
12590 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB,
12595 tree->ChangeOper(GT_ASG_MUL);
12597 #endif // CPU_HAS_FP_SUPPORT
12605 /* Is the destination identical to the first RHS sub-operand? */
12607 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
12609 /* This is "x = ~x" which is the same as "x ^= -1"
12610 * Transform the node into a GT_ASG_XOR */
12612 noway_assert(genActualType(typ) == TYP_INT ||
12613 genActualType(typ) == TYP_LONG);
12615 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT)
12616 ? gtNewIconNode(-1)
12617 : gtNewLconNode(-1);
12632 /* Check for the case "(val + icon) * icon" */
12634 if (op2->gtOper == GT_CNS_INT &&
12635 op1->gtOper == GT_ADD)
12637 GenTreePtr add = op1->gtOp.gtOp2;
12639 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
12641 if (tree->gtOverflow() || op1->gtOverflow())
12646 ssize_t imul = op2->gtIntCon.gtIconVal;
12647 ssize_t iadd = add->gtIntCon.gtIconVal;
12649 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
12652 tree->ChangeOper(oper);
12654 op2->gtIntCon.gtIconVal = iadd * imul;
12656 op1->ChangeOper(GT_MUL);
12658 add->gtIntCon.gtIconVal = imul;
12659 #ifdef _TARGET_64BIT_
12660 if (add->gtType == TYP_INT)
12662 // we need to properly re-sign-extend or truncate after multiplying two int constants above
12663 add->AsIntCon()->TruncateOrSignExtend32();
12665 #endif //_TARGET_64BIT_
12673 /* For "val / 1", just return "val" */
12675 if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1))
12677 DEBUG_DESTROY_NODE(tree);
12680 // Do this for "long" constants as well as ints.
12681 else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == 1))
12683 DEBUG_DESTROY_NODE(tree);
12691 /* Check for the case "(val + icon) << icon" */
12693 if (op2->IsCnsIntOrI() &&
12694 op1->gtOper == GT_ADD && !op1->gtOverflow())
12696 GenTreePtr cns = op1->gtOp.gtOp2;
12698 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
12700 ssize_t ishf = op2->gtIntConCommon.IconValue();
12701 ssize_t iadd = cns->gtIntConCommon.IconValue();
12703 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
12705 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
12707 tree->ChangeOper(GT_ADD);
12708 ssize_t result = iadd << ishf;
12709 op2->gtIntConCommon.SetIconValue(result);
12710 #ifdef _TARGET_64BIT_
12711 if (op1->gtType == TYP_INT)
12713 op2->AsIntCon()->TruncateOrSignExtend32();
12715 #endif // _TARGET_64BIT_
12717 // we are reusing the shift amount node here, but the type we want is that of the shift result
12718 op2->gtType = op1->gtType;
12720 if (cns->gtOper == GT_CNS_INT &&
12721 cns->gtIntCon.gtFieldSeq != nullptr &&
12722 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12724 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
12725 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
12728 op1->ChangeOper(GT_LSH);
12730 cns->gtIntConCommon.SetIconValue(ishf);
12738 if (!optValnumCSE_phase)
12740 /* "x ^ -1" is "~x" */
12742 if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1))
12744 tree->ChangeOper(GT_NOT);
12745 tree->gtOp2 = NULL;
12746 DEBUG_DESTROY_NODE(op2);
12748 else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1))
12750 tree->ChangeOper(GT_NOT);
12751 tree->gtOp2 = NULL;
12752 DEBUG_DESTROY_NODE(op2);
12754 else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) &&
12755 op1->OperIsCompare())
12757 /* "binaryVal ^ 1" is "!binaryVal" */
12758 gtReverseCond(op1);
12759 DEBUG_DESTROY_NODE(op2);
12760 DEBUG_DESTROY_NODE(tree);
12768 return fgMorphInitBlock(tree);
12773 return fgMorphCopyBlock(tree);
12783 // code to generate a magic number and shift amount for the magic number division
12784 // optimization. This code is previously from UTC where it notes it was taken from
12785 // _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
12786 // The paper it is based on is "Division by invariant integers using multiplication"
12787 // by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
12789 template <typename T>
12790 T GetSignedMagicNumberForDivide(T denom, int *shift /*out*/)
12792 // static SMAG smag;
12793 const int bits = sizeof(T) * 8;
12794 const int bits_minus_1 = bits - 1;
12796 typedef typename jitstd::make_unsigned<T>::type UT;
12798 const UT two_nminus1 = UT(1) << bits_minus_1;
12813 absDenom = abs(denom);
12814 t = two_nminus1 + ((unsigned int)denom >> 31);
12815 absNc = t - 1 - (t % absDenom); // absolute value of nc
12816 p = bits_minus_1; // initialize p
12817 q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc)
12818 r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc))
12819 q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom)
12820 r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
12825 q1 *= 2; // update q1 = 2^p / abs(nc)
12826 r1 *= 2; // update r1 = rem(2^p / abs(nc))
12828 if (r1 >= absNc) { // must be unsigned comparison
12833 q2 *= 2; // update q2 = 2^p / abs(denom)
12834 r2 *= 2; // update r2 = rem(2^p / abs(denom))
12836 if (r2 >= absDenom) { // must be unsigned comparison
12841 delta = absDenom - r2;
12842 } while (q1 < delta || (q1 == delta && r1 == 0));
12844 result_magic = q2 + 1; // resulting magic number
12846 result_magic = -result_magic;
12848 *shift = p - bits; // resulting shift
12850 return result_magic;
12854 bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree)
12856 #ifdef _TARGET_ARM64_
12857 // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64
12861 // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do,
12862 // if this method returns true we will introduce a new LclVar and
12863 // a couple of new GenTree nodes, including an assignment to the new LclVar.
12864 // None of these new GenTree nodes will have valid ValueNumbers.
12865 // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase.
12867 // Also during optAssertionProp when extracting side effects we can assert
12868 // during gtBuildCommaList if we have one tree that has Value Numbers
12869 // and another one that does not.
12871 if (!fgGlobalMorph)
12873 // We only perform the Magic Number Divide optimization during
12874 // the initial global morph phase
12878 if (tree->gtFlags & GTF_OVERFLOW)
12881 if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG)
12884 ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue();
12886 if (cons == 0 || cons == -1 || cons == 1)
12889 // codegen will expand these
12893 // someone else will fold this away, so don't make it complicated for them
12894 if (tree->gtOp1->IsCnsIntOrI())
12897 // There is no technical barrier to handling unsigned, however it is quite rare
12898 // and more work to support and test
12899 if (tree->gtFlags & GTF_UNSIGNED)
12907 // transform x%c -> x-((x/c)*c)
12909 GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree)
12911 assert(fgShouldUseMagicNumberDivide(tree));
12913 var_types type = tree->gtType;
12915 GenTree* cns = tree->gtOp2;
12917 GenTree* numerator = fgMakeMultiUse(&tree->gtOp1);
12919 tree->SetOper(GT_DIV);
12921 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns));
12923 GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul);
12926 sub->gtFlags |= GTF_MORPHED;
12932 // For ARM64 we don't have a remainder instruction,
12933 // The architecture manual suggests the following transformation to
12934 // generate code for such operator:
12936 // a % b = a - (a / b) * b;
12938 // This method will produce the above expression in 'a' and 'b' are
12939 // leaf nodes, otherwise, if any of them is not a leaf it will spill
12940 // its value into a temporary variable, an example:
12941 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
12943 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
12945 #ifndef _TARGET_ARM64_
12946 assert(!"This should only be called for ARM64");
12949 if (tree->OperGet() == GT_MOD)
12951 tree->SetOper(GT_DIV);
12953 else if (tree->OperGet() == GT_UMOD)
12955 tree->SetOper(GT_UDIV);
12959 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
12962 var_types type = tree->gtType;
12963 GenTree* denominator = tree->gtOp2;
12964 GenTree* numerator = tree->gtOp1;
12966 if (!numerator->OperIsLeaf())
12968 numerator = fgMakeMultiUse(&tree->gtOp1);
12971 if (!denominator->OperIsLeaf())
12973 denominator = fgMakeMultiUse(&tree->gtOp2);
12976 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
12977 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
12980 sub->gtFlags |= GTF_MORPHED;
12986 // Turn a division by a constant into a multiplication by constant + some adjustments
12987 // see comments on GetSignedMagicNumberForDivide for source of this algorithm.
12988 // returns: the transformed tree
12990 GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
12992 assert(fgShouldUseMagicNumberDivide(tree));
12994 JITDUMP("doing magic number divide optimization\n");
12996 int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue();
12999 var_types type = tree->gtType;
13001 if (tree->gtType == TYP_INT)
13003 magic = GetSignedMagicNumberForDivide<int32_t>((int32_t) denominator, &shift);
13007 magic = GetSignedMagicNumberForDivide<int64_t>((int64_t) denominator, &shift);
13010 GenTree* numerator = nullptr;
13012 // If signs of the denominator and magic number don't match,
13013 // we will need to use the numerator again.
13014 if (signum(denominator) != signum(magic))
13016 numerator = fgMakeMultiUse(&tree->gtOp1);
13017 tree->gtFlags |= GTF_ASG;
13020 if (type == TYP_LONG)
13021 tree->gtOp2->gtIntConCommon.SetLngValue(magic);
13023 tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic);
13025 tree->SetOper(GT_MULHI);
13028 GenTree* mulresult = tree;
13030 JITDUMP("Multiply Result:\n");
13031 DISPTREE(mulresult);
13033 GenTree *adjusted = mulresult;
13035 if (denominator > 0 && magic < 0)
13037 // add the numerator back in
13038 adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator);
13040 else if (denominator < 0 && magic > 0)
13042 // subtract the numerator off
13043 adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator);
13047 adjusted = mulresult;
13050 GenTree* result1 = adjusted;
13053 result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT));
13056 GenTree* secondClone = fgMakeMultiUse(&result1);
13058 GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type));
13061 GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2);
13062 JITDUMP("Final Magic Number divide:\n");
13066 result->gtFlags |= GTF_MORPHED;
13072 //------------------------------------------------------------------------------
13073 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
13077 // oper - Operation to check
13080 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
13082 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
13084 return (oper == GT_OR) || (oper == GT_XOR);
13087 //------------------------------------------------------------------------------
13088 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
13089 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
13092 // tree - tree to check for a rotation pattern
13095 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
13098 // The input is a GT_OR or a GT_XOR tree.
13100 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
13102 #ifndef LEGACY_BACKEND
13104 // Check for a rotation pattern, e.g.,
13117 // The patterns recognized:
13118 // (x << (y & M)) op (x >>> ((-y + N) & M))
13119 // (x >>> ((-y + N) & M)) op (x << (y & M))
13121 // (x << y) op (x >>> (-y + N))
13122 // (x >> > (-y + N)) op (x << y)
13124 // (x >>> (y & M)) op (x << ((-y + N) & M))
13125 // (x << ((-y + N) & M)) op (x >>> (y & M))
13127 // (x >>> y) op (x << (-y + N))
13128 // (x << (-y + N)) op (x >>> y)
13130 // (x << c1) op (x >>> c2)
13131 // (x >>> c1) op (x << c2)
13134 // c1 and c2 are const
13135 // c1 + c2 == bitsize(x)
13138 // M & (N - 1) == N - 1
13139 // op is either | or ^
13141 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) ||
13142 ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
13144 // We can't do anything if the tree has assignments, calls, or volatile
13145 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
13146 // thrown by the original tree will be thrown by the transformed tree as well.
13150 genTreeOps oper = tree->OperGet();
13151 assert(fgOperIsBitwiseRotationRoot(oper));
13153 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
13154 GenTreePtr op1 = tree->gtGetOp1();
13155 GenTreePtr op2 = tree->gtGetOp2();
13156 GenTreePtr leftShiftTree = nullptr;
13157 GenTreePtr rightShiftTree = nullptr;
13158 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
13160 leftShiftTree = op1;
13161 rightShiftTree = op2;
13163 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
13165 leftShiftTree = op2;
13166 rightShiftTree = op1;
13173 // Check if the trees representing the value to shift are identical.
13174 // We already checked that there are no side effects above.
13175 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
13177 GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
13178 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
13179 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
13180 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
13181 GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
13182 GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
13184 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
13185 // shouldn't be masked for the transformation to be valid. If additional
13186 // higher bits are not masked, the transformation is still valid since the result
13187 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
13188 // than the width of the value being shifted.
13189 ssize_t minimalMask = rotatedValueBitSize - 1;
13190 ssize_t leftShiftMask = -1;
13191 ssize_t rightShiftMask = -1;
13193 if ((leftShiftIndex->OperGet() == GT_AND))
13195 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
13197 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
13198 leftShiftIndex = leftShiftIndex->gtGetOp1();
13206 if ((rightShiftIndex->OperGet() == GT_AND))
13208 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
13210 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
13211 rightShiftIndex = rightShiftIndex->gtGetOp1();
13219 if (((minimalMask & leftShiftMask) != minimalMask) ||
13220 ((minimalMask & rightShiftMask) != minimalMask))
13222 // The shift index is overmasked, e.g., we have
13223 // something like (x << y & 15) or
13224 // (x >> (32 - y) & 15 with 32 bit x.
13225 // The transformation is not valid.
13229 GenTreePtr shiftIndexWithAdd = nullptr;
13230 GenTreePtr shiftIndexWithoutAdd = nullptr;
13231 genTreeOps rotateOp = GT_NONE;
13232 GenTreePtr rotateIndex = nullptr;
13234 if (leftShiftIndex->OperGet() == GT_ADD)
13236 shiftIndexWithAdd = leftShiftIndex;
13237 shiftIndexWithoutAdd = rightShiftIndex;
13240 else if (rightShiftIndex->OperGet() == GT_ADD)
13242 shiftIndexWithAdd = rightShiftIndex;
13243 shiftIndexWithoutAdd = leftShiftIndex;
13247 if (shiftIndexWithAdd != nullptr)
13249 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
13251 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
13253 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
13255 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
13257 // We found one of these patterns:
13258 // (x << (y & M)) | (x >>> ((-y + N) & M))
13259 // (x << y) | (x >>> (-y + N))
13260 // (x >>> (y & M)) | (x << ((-y + N) & M))
13261 // (x >>> y) | (x << (-y + N))
13262 // where N == bitsize(x), M is const, and
13263 // M & (N - 1) == N - 1
13265 #ifndef _TARGET_64BIT_
13266 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
13268 // TODO: we need to handle variable-sized long shifts specially on x86.
13269 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
13270 // to add helpers for GT_ROL and GT_ROR.
13271 NYI("Rotation of a long value by variable amount");
13275 rotateIndex = shiftIndexWithoutAdd;
13281 else if ((leftShiftIndex->IsCnsIntOrI() &&
13282 rightShiftIndex->IsCnsIntOrI()))
13284 if (leftShiftIndex->gtIntCon.gtIconVal +
13285 rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
13287 // We found this pattern:
13288 // (x << c1) | (x >>> c2)
13289 // where c1 and c2 are const and c1 + c2 == bitsize(x)
13291 rotateIndex = leftShiftIndex;
13295 if (rotateIndex != nullptr)
13297 noway_assert(GenTree::OperIsRotate(rotateOp));
13299 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
13301 // We can use the same tree only during global morph; reusing the tree in a later morph
13302 // may invalidate value numbers.
13305 tree->gtOp.gtOp1 = rotatedValue;
13306 tree->gtOp.gtOp2 = rotateIndex;
13307 tree->ChangeOper(rotateOp);
13308 noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT));
13312 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
13313 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
13319 #endif //LEGACY_BACKEND
13323 #if !CPU_HAS_FP_SUPPORT
13324 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
13327 genTreeOps oper = tree->OperGet();
13328 var_types typ = tree->TypeGet();
13329 GenTreePtr op1 = tree->gtOp.gtOp1;
13330 GenTreePtr op2 = tree->gtGetOp2();
13333 We have to use helper calls for all FP operations:
13335 FP operators that operate on FP values
13336 casts to and from FP
13337 comparisons of FP values
13340 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
13344 size_t argc = genTypeStSz(typ);
13346 /* Not all FP operations need helper calls */
13360 /* If the result isn't FP, it better be a compare or cast */
13362 if (!(varTypeIsFloating(typ) ||
13363 tree->OperIsCompare() || oper == GT_CAST))
13366 noway_assert(varTypeIsFloating(typ) ||
13367 tree->OperIsCompare() || oper == GT_CAST);
13370 /* Keep track of how many arguments we're passing */
13372 fgPtrArgCntCur += argc;
13374 /* Is this a binary operator? */
13378 /* Add the second operand to the argument count */
13380 fgPtrArgCntCur += argc; argc *= 2;
13382 /* What kind of an operator do we have? */
13386 case GT_ADD: helper = CPX_R4_ADD; break;
13387 case GT_SUB: helper = CPX_R4_SUB; break;
13388 case GT_MUL: helper = CPX_R4_MUL; break;
13389 case GT_DIV: helper = CPX_R4_DIV; break;
13390 // case GT_MOD: helper = CPX_R4_REM; break;
13392 case GT_EQ : helper = CPX_R4_EQ ; break;
13393 case GT_NE : helper = CPX_R4_NE ; break;
13394 case GT_LT : helper = CPX_R4_LT ; break;
13395 case GT_LE : helper = CPX_R4_LE ; break;
13396 case GT_GE : helper = CPX_R4_GE ; break;
13397 case GT_GT : helper = CPX_R4_GT ; break;
13403 noway_assert(!"unexpected FP binary op");
13407 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
13417 noway_assert(!"FP cast");
13419 case GT_NEG: helper = CPX_R4_NEG; break;
13425 noway_assert(!"unexpected FP unary op");
13429 args = gtNewArgList(tree->gtOp.gtOp1);
13432 /* If we have double result/operands, modify the helper */
13434 if (typ == TYP_DOUBLE)
13436 noway_assert(CPX_R4_NEG+1 == CPX_R8_NEG);
13437 noway_assert(CPX_R4_ADD+1 == CPX_R8_ADD);
13438 noway_assert(CPX_R4_SUB+1 == CPX_R8_SUB);
13439 noway_assert(CPX_R4_MUL+1 == CPX_R8_MUL);
13440 noway_assert(CPX_R4_DIV+1 == CPX_R8_DIV);
13446 noway_assert(tree->OperIsCompare());
13448 noway_assert(CPX_R4_EQ+1 == CPX_R8_EQ);
13449 noway_assert(CPX_R4_NE+1 == CPX_R8_NE);
13450 noway_assert(CPX_R4_LT+1 == CPX_R8_LT);
13451 noway_assert(CPX_R4_LE+1 == CPX_R8_LE);
13452 noway_assert(CPX_R4_GE+1 == CPX_R8_GE);
13453 noway_assert(CPX_R4_GT+1 == CPX_R8_GT);
13456 tree = fgMorphIntoHelperCall(tree, helper, args);
13458 if (fgPtrArgCntMax < fgPtrArgCntCur)
13459 fgPtrArgCntMax = fgPtrArgCntCur;
13461 fgPtrArgCntCur -= argc;
13469 if (compCurBB == genReturnBB)
13471 /* This is the 'exitCrit' call at the exit label */
13473 noway_assert(op1->gtType == TYP_VOID);
13474 noway_assert(op2 == 0);
13476 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
13481 /* This is a (real) return value -- check its type */
13484 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
13486 bool allowMismatch = false;
13488 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
13489 if ((info.compRetType == TYP_BYREF &&
13490 genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
13491 (op1->TypeGet() == TYP_BYREF &&
13492 genActualType(info.compRetType) == TYP_I_IMPL))
13493 allowMismatch = true;
13495 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
13496 allowMismatch = true;
13498 if (!allowMismatch)
13499 NO_WAY("Return type mismatch");
13510 /*****************************************************************************
13512 * Transform the given tree for code generation and return an equivalent tree.
13516 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
13518 noway_assert(tree);
13519 noway_assert(tree->gtOper != GT_STMT);
13524 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
13526 noway_assert(!"JitBreakMorphTree hit");
13532 int thisMorphNum = 0;
13533 if (verbose && treesBeforeAfterMorph)
13535 thisMorphNum = morphNum++;
13536 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
13541 /*-------------------------------------------------------------------------
13542 * fgMorphTree() can potentially replace a tree with another, and the
13543 * caller has to store the return value correctly.
13544 * Turn this on to always make copy of "tree" here to shake out
13545 * hidden/unupdated references.
13550 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
13554 #ifdef SMALL_TREE_NODES
13555 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
13557 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
13562 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
13565 copy->CopyFrom(tree, this);
13567 #if defined (LATE_DISASM)
13568 // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
13569 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
13571 copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
13572 copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
13576 DEBUG_DESTROY_NODE(tree);
13583 /* Ensure that we haven't morphed this node already */
13584 assert(((tree->gtFlags & GTF_MORPHED) == 0) && "ERROR: Already morphed this node!");
13586 #if LOCAL_ASSERTION_PROP
13587 /* Before morphing the tree, we try to propagate any active assertions */
13588 if (optLocalAssertionProp)
13590 /* Do we have any active assertions? */
13592 if (optAssertionCount > 0)
13594 GenTreePtr newTree = tree;
13595 while (newTree != NULL)
13598 /* newTree is non-Null if we propagated an assertion */
13599 newTree = optAssertionProp(apFull, tree, NULL);
13601 noway_assert(tree != NULL);
13604 PREFAST_ASSUME(tree != NULL);
13608 /* Save the original un-morphed tree for fgMorphTreeDone */
13610 GenTreePtr oldTree = tree;
13612 /* Figure out what kind of a node we have */
13614 unsigned kind = tree->OperKind();
13616 /* Is this a constant node? */
13618 if (kind & GTK_CONST)
13620 tree = fgMorphConst(tree);
13624 /* Is this a leaf node? */
13626 if (kind & GTK_LEAF)
13628 tree = fgMorphLeaf(tree);
13632 /* Is it a 'simple' unary/binary operator? */
13634 if (kind & GTK_SMPOP)
13636 tree = fgMorphSmpOp(tree, mac);
13640 /* See what kind of a special operator we have here */
13642 switch (tree->OperGet())
13645 tree = fgMorphField(tree, mac);
13649 tree = fgMorphCall(tree->AsCall());
13652 case GT_ARR_BOUNDS_CHECK:
13653 #ifdef FEATURE_SIMD
13655 #endif // FEATURE_SIMD
13657 fgSetRngChkTarget(tree);
13659 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
13660 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
13661 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
13662 // If the index is a comma(throw, x), just return that.
13663 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
13665 tree = bndsChk->gtIndex;
13668 // Propagate effects flags upwards
13669 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
13670 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
13672 // Otherwise, we don't change the tree.
13677 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
13678 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
13681 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
13683 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
13684 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
13687 fgSetRngChkTarget(tree, false);
13690 case GT_ARR_OFFSET:
13691 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
13692 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
13693 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
13694 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
13695 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
13696 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
13698 fgSetRngChkTarget(tree, false);
13702 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
13703 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
13704 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
13711 noway_assert(!"unexpected operator");
13715 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
13721 #if LOCAL_ASSERTION_PROP
13722 /*****************************************************************************
13724 * Kill all dependent assertions with regard to lclNum.
13728 void Compiler::fgKillDependentAssertions(unsigned lclNum
13729 DEBUGARG(GenTreePtr tree))
13731 LclVarDsc * varDsc = &lvaTable[lclNum];
13733 if (varDsc->lvPromoted)
13735 noway_assert(varTypeIsStruct(varDsc));
13737 // Kill the field locals.
13738 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
13740 fgKillDependentAssertions(i DEBUGARG(tree));
13743 // Fall through to kill the struct local itself.
13746 /* All dependent assertions are killed here */
13748 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
13752 AssertionIndex index = optAssertionCount;
13753 while (killed && (index > 0))
13755 if (BitVecOps::IsMember(apTraits, killed, index - 1))
13758 AssertionDsc* curAssertion = optGetAssertion(index);
13759 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
13760 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) &&
13761 (curAssertion->op2.lcl.lclNum == lclNum)));
13764 printf("\nThe assignment ");
13766 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
13767 optPrintAssertion(curAssertion);
13770 // Remove this bit from the killed mask
13771 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
13773 optAssertionRemove(index);
13779 // killed mask should now be zero
13780 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
13783 #endif // LOCAL_ASSERTION_PROP
13786 /*****************************************************************************
13788 * This function is called to complete the morphing of a tree node
13789 * It should only be called once for each node.
13790 * If DEBUG is defined the flag GTF_MORPHED is checked and updated,
13791 * to enforce the invariant that each node is only morphed once.
13792 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
13793 * by an equivalent tree.
13797 void Compiler::fgMorphTreeDone(GenTreePtr tree,
13798 GenTreePtr oldTree /* == NULL */
13799 DEBUGARG(int morphNum))
13802 if (verbose && treesBeforeAfterMorph)
13804 printf("\nfgMorphTree (after %d):\n", morphNum);
13806 printf(""); // in our logic this causes a flush
13810 if (!fgGlobalMorph)
13813 if ((oldTree != NULL) && (oldTree != tree))
13815 /* Ensure that we have morphed this node */
13816 assert((tree->gtFlags & GTF_MORPHED) && "ERROR: Did not morph this node!");
13819 TransferTestDataToNode(oldTree, tree);
13824 // Ensure that we haven't morphed this node already
13825 assert(((tree->gtFlags & GTF_MORPHED) == 0) && "ERROR: Already morphed this node!");
13828 if (tree->OperKind() & GTK_CONST)
13831 #if LOCAL_ASSERTION_PROP
13833 if (!optLocalAssertionProp)
13836 /* Do we have any active assertions? */
13838 if (optAssertionCount > 0)
13840 /* Is this an assignment to a local variable */
13842 if ((tree->OperKind() & GTK_ASGOP) &&
13843 (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD))
13845 unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum; noway_assert(op1LclNum < lvaCount);
13846 fgKillDependentAssertions(op1LclNum DEBUGARG(tree));
13850 /* If this tree makes a new assertion - make it available */
13851 optAssertionGen(tree);
13853 #endif // LOCAL_ASSERTION_PROP
13858 /* Mark this node as being morphed */
13859 tree->gtFlags |= GTF_MORPHED;
13864 /*****************************************************************************
13866 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
13867 * Returns true if we modified the flow graph
13870 bool Compiler::fgFoldConditional(BasicBlock * block)
13872 bool result = false;
13874 // We don't want to make any code unreachable
13875 if (opts.compDbgCode || opts.MinOpts())
13878 if (block->bbJumpKind == BBJ_COND)
13880 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
13882 GenTreePtr stmt = block->bbTreeList->gtPrev;
13884 noway_assert(stmt->gtNext == NULL);
13886 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
13888 noway_assert(fgRemoveRestOfBlock);
13890 /* Unconditional throw - transform the basic block into a BBJ_THROW */
13891 fgConvertBBToThrowBB(block);
13893 /* Remove 'block' from the predecessor list of 'block->bbNext' */
13894 fgRemoveRefPred(block->bbNext, block);
13896 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
13897 fgRemoveRefPred(block->bbJumpDest, block);
13902 printf("\nConditional folded at BB%02u\n", block->bbNum);
13903 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
13909 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
13911 /* Did we fold the conditional */
13913 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
13914 GenTreePtr cond; cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
13916 if (cond->OperKind() & GTK_CONST)
13918 /* Yupee - we folded the conditional!
13919 * Remove the conditional statement */
13921 noway_assert(cond->gtOper == GT_CNS_INT);
13922 noway_assert((block->bbNext->countOfInEdges() > 0) &&
13923 (block->bbJumpDest->countOfInEdges() > 0));
13925 /* remove the statement from bbTreelist - No need to update
13926 * the reference counts since there are no lcl vars */
13927 fgRemoveStmt(block, stmt);
13929 // block is a BBJ_COND that we are folding the conditional for
13930 // bTaken is the path that will always be taken from block
13931 // bNotTaken is the path that will never be taken from block
13933 BasicBlock * bTaken;
13934 BasicBlock * bNotTaken;
13936 if (cond->gtIntCon.gtIconVal != 0)
13938 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
13939 block->bbJumpKind = BBJ_ALWAYS;
13940 bTaken = block->bbJumpDest;
13941 bNotTaken = block->bbNext;
13945 /* Unmark the loop if we are removing a backwards branch */
13946 /* dest block must also be marked as a loop head and */
13947 /* We must be able to reach the backedge block */
13948 if ((block->bbJumpDest->isLoopHead()) &&
13949 (block->bbJumpDest->bbNum <= block->bbNum) &&
13950 fgReachable(block->bbJumpDest, block))
13952 optUnmarkLoopBlocks(block->bbJumpDest, block);
13955 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
13956 block->bbJumpKind = BBJ_NONE;
13957 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
13958 bTaken = block->bbNext;
13959 bNotTaken = block->bbJumpDest;
13962 if (fgHaveValidEdgeWeights)
13964 // We are removing an edge from block to bNotTaken
13965 // and we have already computed the edge weights, so
13966 // we will try to adjust some of the weights
13968 flowList * edgeTaken = fgGetPredForBlock(bTaken, block);
13969 BasicBlock * bUpdated = NULL; // non-NULL if we updated the weight of an internal block
13971 // We examine the taken edge (block -> bTaken)
13972 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
13973 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
13974 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
13976 if (block->bbFlags & BBF_PROF_WEIGHT)
13978 // The edge weights for (block -> bTaken) are 100% of block's weight
13979 edgeTaken->flEdgeWeightMin = block->bbWeight;
13980 edgeTaken->flEdgeWeightMax = block->bbWeight;
13982 if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
13984 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
13986 // Update the weight of bTaken
13987 bTaken->inheritWeight(block);
13992 else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
13994 if (bTaken->countOfInEdges() == 1)
13996 // There is only one in edge to bTaken
13997 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
13998 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
14000 // Update the weight of block
14001 block->inheritWeight(bTaken);
14006 if (bUpdated != NULL)
14009 // Now fix the weights of the edges out of 'bUpdated'
14010 switch (bUpdated->bbJumpKind) {
14012 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
14013 edge->flEdgeWeightMax = bUpdated->bbWeight;
14016 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
14017 edge->flEdgeWeightMax = bUpdated->bbWeight;
14020 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
14021 edge->flEdgeWeightMax = bUpdated->bbWeight;
14024 // We don't handle BBJ_SWITCH
14031 /* modify the flow graph */
14033 /* Remove 'block' from the predecessor list of 'bNotTaken' */
14034 fgRemoveRefPred(bNotTaken, block);
14039 printf("\nConditional folded at BB%02u\n", block->bbNum);
14040 printf("BB%02u becomes a %s", block->bbNum,
14041 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
14042 if (block->bbJumpKind == BBJ_ALWAYS)
14043 printf(" to BB%02u", block->bbJumpDest->bbNum);
14048 /* if the block was a loop condition we may have to modify
14049 * the loop table */
14051 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
14053 /* Some loops may have been already removed by
14054 * loop unrolling or conditional folding */
14056 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
14059 /* We are only interested in the loop bottom */
14061 if (optLoopTable[loopNum].lpBottom == block)
14063 if (cond->gtIntCon.gtIconVal == 0)
14065 /* This was a bogus loop (condition always false)
14066 * Remove the loop from the table */
14068 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
14072 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n",
14074 optLoopTable[loopNum].lpFirst ->bbNum,
14075 optLoopTable[loopNum].lpBottom->bbNum);
14085 else if (block->bbJumpKind == BBJ_SWITCH)
14087 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
14089 GenTreePtr stmt = block->bbTreeList->gtPrev;
14091 noway_assert(stmt->gtNext == NULL);
14093 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
14095 noway_assert(fgRemoveRestOfBlock);
14097 /* Unconditional throw - transform the basic block into a BBJ_THROW */
14098 fgConvertBBToThrowBB(block);
14100 /* update the flow graph */
14102 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
14103 BasicBlock * * jumpTab = block->bbJumpSwt->bbsDstTab;
14105 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
14107 BasicBlock * curJump = *jumpTab;
14109 /* Remove 'block' from the predecessor list of 'curJump' */
14110 fgRemoveRefPred(curJump, block);
14116 printf("\nConditional folded at BB%02u\n", block->bbNum);
14117 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
14124 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
14126 /* Did we fold the conditional */
14128 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
14129 GenTreePtr cond; cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
14131 if (cond->OperKind() & GTK_CONST)
14133 /* Yupee - we folded the conditional!
14134 * Remove the conditional statement */
14136 noway_assert(cond->gtOper == GT_CNS_INT);
14138 /* remove the statement from bbTreelist - No need to update
14139 * the reference counts since there are no lcl vars */
14140 fgRemoveStmt(block, stmt);
14142 /* modify the flow graph */
14144 /* Find the actual jump target */
14145 unsigned switchVal; switchVal = (unsigned)cond->gtIntCon.gtIconVal;
14146 unsigned jumpCnt; jumpCnt = block->bbJumpSwt->bbsCount;
14147 BasicBlock * * jumpTab; jumpTab = block->bbJumpSwt->bbsDstTab;
14148 bool foundVal; foundVal = false;
14150 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
14152 BasicBlock * curJump = *jumpTab;
14154 assert (curJump->countOfInEdges() > 0);
14156 // If val matches switchVal or we are at the last entry and
14157 // we never found the switch value then set the new jump dest
14159 if ( (val == switchVal) || (!foundVal && (val == jumpCnt-1)))
14161 if (curJump != block->bbNext)
14163 /* transform the basic block into a BBJ_ALWAYS */
14164 block->bbJumpKind = BBJ_ALWAYS;
14165 block->bbJumpDest = curJump;
14167 //if we are jumping backwards, make sure we have a GC Poll.
14168 if (curJump->bbNum > block->bbNum)
14169 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
14173 /* transform the basic block into a BBJ_NONE */
14174 block->bbJumpKind = BBJ_NONE;
14175 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
14181 /* Remove 'block' from the predecessor list of 'curJump' */
14182 fgRemoveRefPred(curJump, block);
14188 printf("\nConditional folded at BB%02u\n", block->bbNum);
14189 printf("BB%02u becomes a %s", block->bbNum,
14190 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
14191 if (block->bbJumpKind == BBJ_ALWAYS)
14192 printf(" to BB%02u", block->bbJumpDest->bbNum);
14204 //*****************************************************************************
14206 // Morphs a single statement in a block.
14207 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
14209 // Returns true if 'stmt' was removed from the block.
14210 // Returns false if 'stmt' is still in the block (even if other statements were removed).
14213 bool Compiler::fgMorphBlockStmt(BasicBlock * block,
14215 DEBUGARG(const char * msg) )
14217 noway_assert(stmt->gtOper == GT_STMT);
14220 compCurStmt = stmt;
14222 GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr);
14224 // Bug 1106830 - During the CSE phase we can't just remove
14225 // morph->gtOp.gtOp2 as it could contain CSE expressions.
14226 // This leads to a noway_assert in OptCSE.cpp when
14227 // searching for the removed CSE ref. (using gtFindLink)
14229 if (!optValnumCSE_phase)
14231 /* Check for morph as a GT_COMMA with an unconditional throw */
14232 if (fgIsCommaThrow(morph, true))
14237 printf("Folding a top-level fgIsCommaThrow stmt\n");
14238 printf("Removing op2 as unreachable:\n");
14239 gtDispTree(morph->gtOp.gtOp2);
14243 /* Use the call as the new stmt */
14244 morph = morph->gtOp.gtOp1;
14245 noway_assert(morph->gtOper == GT_CALL);
14248 /* we can get a throw as a statement root*/
14249 if (fgIsThrow(morph))
14254 printf("We have a top-level fgIsThrow stmt\n");
14255 printf("Removing the rest of block as unreachable:\n");
14258 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
14259 fgRemoveRestOfBlock = true;
14263 stmt->gtStmt.gtStmtExpr = morph;
14265 /* Can the entire tree be removed ? */
14267 bool removedStmt = fgCheckRemoveStmt(block, stmt);
14269 /* Or this is the last statement of a conditional branch that was just folded */
14271 if ((!removedStmt) && (stmt->gtNext == NULL) && !fgRemoveRestOfBlock)
14273 if (fgFoldConditional(block))
14275 if (block->bbJumpKind != BBJ_THROW)
14276 removedStmt = true;
14282 /* Have to re-do the evaluation order since for example
14283 * some later code does not expect constants as op1 */
14284 gtSetStmtInfo(stmt);
14286 /* Have to re-link the nodes for this statement */
14287 fgSetStmtSeq(stmt);
14293 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
14299 if (fgRemoveRestOfBlock)
14301 /* Remove the rest of the stmts in the block */
14303 while (stmt->gtNext)
14305 stmt = stmt->gtNext;
14306 noway_assert(stmt->gtOper == GT_STMT);
14308 fgRemoveStmt(block, stmt);
14311 // The rest of block has been removed
14312 // and we will always throw an exception
14314 // Update succesors of block
14315 fgRemoveBlockAsPred(block);
14317 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
14318 // We should not convert it to a ThrowBB.
14319 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0) ) {
14320 // Convert block to a throw bb
14321 fgConvertBBToThrowBB(block);
14327 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
14330 fgRemoveRestOfBlock = false;
14333 return removedStmt;
14336 /*****************************************************************************
14338 * Morph the statements of the given block.
14339 * This function should be called just once for a block. Use fgMorphBlockStmt()
14340 * for reentrant calls.
14343 void Compiler::fgMorphStmts(BasicBlock * block,
14344 bool * mult, bool * lnot, bool * loadw)
14346 fgRemoveRestOfBlock = false;
14348 noway_assert(fgExpandInline == false);
14350 /* Make the current basic block address available globally */
14354 *mult = *lnot = *loadw = false;
14356 fgCurrentlyInUseArgTemps = hashBv::Create(this);
14358 GenTreePtr stmt, prev;
14359 for (stmt = block->bbTreeList, prev = NULL;
14361 prev = stmt->gtStmt.gtStmtExpr,
14362 stmt = stmt->gtNext)
14364 noway_assert(stmt->gtOper == GT_STMT);
14366 if (fgRemoveRestOfBlock)
14368 fgRemoveStmt(block, stmt);
14371 #ifdef FEATURE_SIMD
14372 if (!opts.MinOpts() &&
14373 stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT &&
14374 stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG)
14376 fgMorphCombineSIMDFieldAssignments(block, stmt);
14380 fgMorphStmt = stmt;
14381 compCurStmt = stmt;
14382 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
14386 if (stmt == block->bbTreeList)
14387 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
14389 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
14393 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
14398 /* Morph this statement tree */
14400 GenTreePtr morph = fgMorphTree(tree);
14402 // mark any outgoing arg temps as free so we can reuse them in the next statement.
14404 fgCurrentlyInUseArgTemps->ZeroAll();
14406 // Has fgMorphStmt been sneakily changed ?
14408 if (stmt->gtStmt.gtStmtExpr != tree)
14410 /* This must be tailcall. Ignore 'morph' and carry on with
14411 the tail-call node */
14413 morph = stmt->gtStmt.gtStmtExpr;
14414 noway_assert(compTailCallUsed);
14415 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
14416 noway_assert(stmt->gtNext == NULL);
14418 GenTreeCall* call = morph->AsCall();
14420 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
14421 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
14422 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
14423 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP)));
14425 else if (block != compCurBB)
14427 /* This must be a tail call that caused a GCPoll to get
14428 injected. We haven't actually morphed the call yet
14429 but the flag still got set, clear it here... */
14432 tree->gtFlags &= ~GTF_MORPHED;
14434 noway_assert(compTailCallUsed);
14435 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
14436 noway_assert(stmt->gtNext == NULL);
14438 GenTreeCall* call = morph->AsCall();
14441 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
14442 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
14443 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
14444 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP)));
14448 if (compStressCompile(STRESS_CLONE_EXPR, 30))
14450 // Clone all the trees to stress gtCloneExpr()
14454 printf("\nfgMorphTree (stressClone from):\n");
14458 morph = gtCloneExpr(morph);
14459 noway_assert(morph);
14463 printf("\nfgMorphTree (stressClone to):\n");
14468 /* If the hash value changes. we modified the tree during morphing */
14471 unsigned newHash = gtHashValue(morph);
14472 if (newHash != oldHash)
14474 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
14480 /* Check for morph as a GT_COMMA with an unconditional throw */
14481 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
14483 /* Use the call as the new stmt */
14484 morph = morph->gtOp.gtOp1;
14485 noway_assert(morph->gtOper == GT_CALL);
14486 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
14488 fgRemoveRestOfBlock = true;
14491 stmt->gtStmt.gtStmtExpr = tree = morph;
14493 noway_assert(fgPtrArgCntCur == 0);
14495 if (fgRemoveRestOfBlock)
14498 /* Has the statement been optimized away */
14500 if (fgCheckRemoveStmt(block, stmt))
14503 /* Check if this block ends with a conditional branch that can be folded */
14505 if (fgFoldConditional(block))
14508 if (ehBlockHasExnFlowDsc(block))
14511 #if OPT_MULT_ADDSUB
14513 /* Note whether we have two or more +=/-= operators in a row */
14515 if (tree->gtOper == GT_ASG_ADD ||
14516 tree->gtOper == GT_ASG_SUB)
14518 if (prev && prev->gtOper == tree->gtOper)
14524 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
14526 if (tree->gtOper == GT_ASG_OR &&
14528 prev->gtOper == GT_ASG)
14534 if (fgRemoveRestOfBlock)
14536 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
14538 GenTreePtr first = block->bbTreeList; noway_assert(first);
14539 GenTreePtr last = first->gtPrev; noway_assert(last && last->gtNext == NULL);
14540 GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
14542 if (((block->bbJumpKind == BBJ_COND ) && (lastStmt->gtOper == GT_JTRUE )) ||
14543 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)) )
14545 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
14547 if (op1->OperKind() & GTK_RELOP)
14549 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
14550 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
14553 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
14557 /* Mark block as a BBJ_THROW block */
14558 fgConvertBBToThrowBB(block);
14561 noway_assert(fgExpandInline == false);
14563 #if FEATURE_FASTTAILCALL
14564 GenTreePtr recursiveTailCall = nullptr;
14565 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
14567 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
14572 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
14575 // Reset this back so that it doesn't leak out impacting other blocks
14576 fgRemoveRestOfBlock = false;
14579 /*****************************************************************************
14581 * Morph the blocks of the method.
14582 * Returns true if the basic block list is modified.
14583 * This function should be called just once.
14586 void Compiler::fgMorphBlocks()
14590 printf("\n*************** In fgMorphBlocks()\n");
14593 /* Since fgMorphTree can be called after various optimizations to re-arrange
14594 * the nodes we need a global flag to signal if we are during the one-pass
14595 * global morphing */
14597 fgGlobalMorph = true;
14599 #if LOCAL_ASSERTION_PROP
14601 // Local assertion prop is enabled if we are optimized
14603 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
14605 if (optLocalAssertionProp)
14608 // Initialize for local assertion prop
14610 optAssertionInit(true);
14612 #elif ASSERTION_PROP
14614 // If LOCAL_ASSERTION_PROP is not set
14615 // and we have global assertion prop
14616 // then local assertion prop is always off
14618 optLocalAssertionProp = false;
14622 /*-------------------------------------------------------------------------
14623 * Process all basic blocks in the function
14626 BasicBlock * block = fgFirstBB; noway_assert(block);
14629 compCurStmtNum = 0;
14634 #if OPT_MULT_ADDSUB
14642 bool loadw = false;
14646 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
14649 #if LOCAL_ASSERTION_PROP
14650 if (optLocalAssertionProp)
14653 // Clear out any currently recorded assertion candidates
14654 // before processing each basic block,
14655 // also we must handle QMARK-COLON specially
14657 optAssertionReset(0);
14661 /* Process all statement trees in the basic block */
14665 fgMorphStmts(block, &mult, &lnot, &loadw);
14667 #if OPT_MULT_ADDSUB
14669 if (mult && (opts.compFlags & CLFLG_TREETRANS) &&
14670 !opts.compDbgCode && !opts.MinOpts())
14672 for (tree = block->bbTreeList; tree; tree = tree->gtNext)
14674 noway_assert(tree->gtOper == GT_STMT);
14675 GenTreePtr last = tree->gtStmt.gtStmtExpr;
14677 if (last->gtOper == GT_ASG_ADD ||
14678 last->gtOper == GT_ASG_SUB)
14683 GenTreePtr dst1 = last->gtOp.gtOp1;
14684 GenTreePtr src1 = last->gtOp.gtOp2;
14686 if (!last->IsCnsIntOrI())
14689 if (dst1->gtOper != GT_LCL_VAR)
14691 if (!src1->IsCnsIntOrI())
14699 /* Look at the next statement */
14701 temp = tree->gtNext;
14705 noway_assert(temp->gtOper == GT_STMT);
14706 next = temp->gtStmt.gtStmtExpr;
14708 if (next->gtOper != last->gtOper)
14710 if (next->gtType != last->gtType)
14713 dst2 = next->gtOp.gtOp1;
14714 src2 = next->gtOp.gtOp2;
14716 if (dst2->gtOper != GT_LCL_VAR)
14718 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
14721 if (!src2->IsCnsIntOrI())
14724 if (last->gtOverflow() != next->gtOverflow())
14727 const ssize_t i1 = src1->gtIntCon.gtIconVal;
14728 const ssize_t i2 = src2->gtIntCon.gtIconVal;
14729 const ssize_t itemp = i1 + i2;
14731 /* if the operators are checking for overflow, check for overflow of the operands */
14733 if (next->gtOverflow())
14735 if (next->TypeGet() == TYP_LONG)
14737 if (next->gtFlags & GTF_UNSIGNED)
14739 ClrSafeInt<UINT64> si1(i1);
14740 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
14745 ClrSafeInt<INT64> si1(i1);
14746 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
14750 else if (next->gtFlags & GTF_UNSIGNED)
14752 ClrSafeInt<UINT32> si1(i1);
14753 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
14758 ClrSafeInt<INT32> si1(i1);
14759 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
14764 /* Fold the two increments/decrements into one */
14766 src1->gtIntCon.gtIconVal = itemp;
14767 #ifdef _TARGET_64BIT_
14768 if (src1->gtType == TYP_INT)
14770 src1->AsIntCon()->TruncateOrSignExtend32();
14772 #endif //_TARGET_64BIT_
14774 /* Remove the second statement completely */
14776 noway_assert(tree->gtNext == temp);
14777 noway_assert(temp->gtPrev == tree);
14781 noway_assert(temp->gtNext->gtPrev == temp);
14783 temp->gtNext->gtPrev = tree;
14784 tree->gtNext = temp->gtNext;
14790 noway_assert(block->bbTreeList->gtPrev == temp);
14792 block->bbTreeList->gtPrev = tree;
14805 /* Are we using a single return block? */
14807 if (block->bbJumpKind == BBJ_RETURN)
14809 if ((genReturnBB != nullptr) &&
14810 (genReturnBB != block) &&
14811 ((block->bbFlags & BBF_HAS_JMP) == 0))
14813 /* We'll jump to the genReturnBB */
14815 #if !defined(_TARGET_X86_)
14816 if (info.compFlags & CORINFO_FLG_SYNCH)
14818 fgConvertSyncReturnToLeave(block);
14821 #endif // !_TARGET_X86_
14823 block->bbJumpKind = BBJ_ALWAYS;
14824 block->bbJumpDest = genReturnBB;
14828 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
14829 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
14830 // Such blocks do materialize as part of in-lining.
14832 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
14833 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
14834 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
14837 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
14839 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
14840 GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
14842 //replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
14843 if (genReturnLocal != BAD_VAR_NUM)
14845 // Method must be returning a value other than TYP_VOID.
14846 noway_assert(compMethodHasRetVal());
14848 // This block must be ending with a GT_RETURN
14849 noway_assert(last != nullptr);
14850 noway_assert(last->gtOper == GT_STMT);
14851 noway_assert(last->gtNext == nullptr);
14852 noway_assert(ret != nullptr);
14854 // GT_RETURN must have non-null operand as the method is returning the value assigned to genReturnLocal
14855 noway_assert(ret->OperGet() == GT_RETURN);
14856 noway_assert(ret->gtGetOp1() != nullptr);
14857 noway_assert(ret->gtGetOp2() == nullptr);
14859 last->gtStmt.gtStmtExpr = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
14861 //make sure that copy-prop ignores this assignment.
14862 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
14864 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
14866 // This block ends with a GT_RETURN
14867 noway_assert(last != nullptr);
14868 noway_assert(last->gtOper == GT_STMT);
14869 noway_assert(last->gtNext == nullptr);
14871 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
14872 noway_assert(ret->TypeGet() == TYP_VOID);
14873 noway_assert(ret->gtGetOp1() == nullptr);
14874 noway_assert(ret->gtGetOp2() == nullptr);
14876 fgRemoveStmt(block, last);
14882 printf("morph BB%02u to point at onereturn. New block is\n",
14884 fgTableDispBasicBlock(block);
14890 block = block->bbNext;
14894 /* We are done with the global morphing phase */
14896 fgGlobalMorph = false;
14901 fgDispBasicBlocks(true);
14907 /*****************************************************************************
14909 * Make some decisions about the kind of code to generate.
14912 void Compiler::fgSetOptions()
14915 /* Should we force fully interruptible code ? */
14918 if (JitConfig.JitFullyInt() ||
14919 compStressCompile(STRESS_GENERIC_VARN, 30))
14921 noway_assert(!codeGen->isGCTypeFixed());
14922 genInterruptible = true;
14926 #ifdef DEBUGGING_SUPPORT
14927 if (opts.compDbgCode)
14929 assert(!codeGen->isGCTypeFixed());
14930 genInterruptible = true; // debugging is easier this way ...
14934 /* Assume we won't need an explicit stack frame if this is allowed */
14937 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
14938 // the callee-saved registers.
14939 noway_assert(!compTailCallUsed || !compLocallocUsed);
14941 if (compLocallocUsed)
14942 codeGen->setFramePointerRequired(true);
14944 #ifdef _TARGET_X86_
14946 if (compTailCallUsed)
14947 codeGen->setFramePointerRequired(true);
14949 #endif // _TARGET_X86_
14951 if (!opts.genFPopt)
14952 codeGen->setFramePointerRequired(true);
14954 // Assert that the EH table has been initialized by now. Note that
14955 // compHndBBtabAllocCount never decreases; it is a high-water mark
14956 // of table allocation. In contrast, compHndBBtabCount does shrink
14957 // if we delete a dead EH region, and if it shrinks to zero, the
14958 // table pointer compHndBBtab is unreliable.
14959 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
14961 #ifdef _TARGET_X86_
14963 // Note: this case, and the !X86 case below, should both use the
14964 // !X86 path. This would require a few more changes for X86 to use
14965 // compHndBBtabCount (the current number of EH clauses) instead of
14966 // info.compXcptnsCount (the number of EH clauses in IL), such as
14967 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
14968 // an EH clause that we delete as statically dead code before we
14969 // get here, leaving no EH clauses left, and thus no requirement
14970 // to use a frame pointer because of EH. But until all the code uses
14971 // the same test, leave info.compXcptnsCount here.
14972 if (info.compXcptnsCount > 0)
14973 codeGen->setFramePointerRequiredEH(true);
14975 #else // !_TARGET_X86_
14977 if (compHndBBtabCount > 0)
14978 codeGen->setFramePointerRequiredEH(true);
14980 #endif // _TARGET_X86_
14982 // fpPtrArgCntMax records the maximum number of pushed arguments
14983 // Depending upon this value of the maximum number of pushed arguments
14984 // we may need to use an EBP frame or be partially interuptible
14987 if (!compCanEncodePtrArgCntMax())
14991 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially interruptible\n");
14993 genInterruptible = false;
14995 if (fgPtrArgCntMax >= sizeof(unsigned))
14999 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
15001 codeGen->setFramePointerRequiredGCInfo(true);
15004 if (info.compCallUnmanaged)
15006 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
15009 if (info.compPublishStubParam)
15011 codeGen->setFramePointerRequiredGCInfo(true);
15014 if (opts.compNeedSecurityCheck)
15016 codeGen->setFramePointerRequiredGCInfo(true);
15018 #ifndef JIT32_GCENCODER
15020 // The decoder only reports objects in frames with exceptions if the frame
15021 // is fully interruptible.
15022 // Even if there is no catch or other way to resume execution in this frame
15023 // the VM requires the security object to remain alive until later, so
15024 // Frames with security objects must be fully interruptible.
15025 genInterruptible = true;
15027 #endif // JIT32_GCENCODER
15030 if (compIsProfilerHookNeeded())
15032 codeGen->setFramePointerRequired(true);
15035 if (info.compIsVarArgs)
15037 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
15038 codeGen->setFramePointerRequiredGCInfo(true);
15041 if (lvaReportParamTypeArg())
15043 codeGen->setFramePointerRequiredGCInfo(true);
15046 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
15050 /*****************************************************************************/
15052 GenTreePtr Compiler::fgInitThisClass()
15054 noway_assert(!compIsForInlining());
15056 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
15058 if (!kind.needsRuntimeLookup)
15060 return fgGetSharedCCtor(info.compClassHnd);
15064 // Collectible types requires that for shared generic code, if we use the generic context paramter
15065 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
15066 // context parameter is this that we don't need the eager reporting logic.)
15067 lvaGenericsContextUsed = true;
15069 switch (kind.runtimeLookupKind)
15071 case CORINFO_LOOKUP_THISOBJ :
15072 // This code takes a this pointer; but we need to pass the static method desc to get the right point in the hierarchy
15074 GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
15075 // Vtable pointer of this object
15076 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
15077 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
15078 GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
15080 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS,
15082 gtNewArgList(vtTree, methodHnd));
15086 case CORINFO_LOOKUP_CLASSPARAM :
15088 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
15089 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS,
15091 gtNewArgList(vtTree));
15094 case CORINFO_LOOKUP_METHODPARAM :
15096 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
15097 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS,
15099 gtNewArgList(gtNewIconNode(0),methHndTree));
15105 noway_assert(!"Unknown LOOKUP_KIND");
15111 /*****************************************************************************
15113 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
15114 * except for the allowed ? 1 : 0; pattern.
15116 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
15118 if ((*tree)->OperGet() == GT_QMARK)
15120 fgCheckQmarkAllowedForm(*tree);
15122 return WALK_CONTINUE;
15125 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
15127 assert(tree->OperGet() == GT_QMARK);
15128 #ifndef LEGACY_BACKEND
15129 assert(!"Qmarks beyond morph disallowed.");
15130 #else // LEGACY_BACKEND
15131 GenTreePtr colon = tree->gtOp.gtOp2;
15133 assert(colon->gtOp.gtOp1->gtOper == GT_CNS_INT);
15134 assert(colon->gtOp.gtOp1->AsIntCon()->IconValue() == 0);
15136 assert(colon->gtOp.gtOp2->gtOper == GT_CNS_INT);
15137 assert(colon->gtOp.gtOp2->AsIntCon()->IconValue() == 1);
15138 #endif // LEGACY_BACKEND
15141 /*****************************************************************************
15143 * Verify that the importer has created GT_QMARK nodes in a way we can
15144 * process them. The following is allowed:
15146 * 1. A top level qmark. Top level qmark is of the form:
15147 * a) (bool) ? (void) : (void) OR
15148 * b) V0N = (bool) ? (type) : (type)
15150 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
15151 * of either op1 of colon or op2 of colon but not a child of any other
15154 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
15156 GenTreePtr topQmark = fgGetTopLevelQmark(expr);
15158 // If the top level Qmark is null, then scan the tree to make sure
15159 // there are no qmarks within it.
15160 if (topQmark == NULL)
15162 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, NULL);
15166 // We could probably expand the cond node also, but don't think the extra effort is necessary,
15167 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
15168 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, NULL);
15170 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
15171 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
15176 /*****************************************************************************
15178 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
15179 * node is not present. If the top level GT_QMARK node is assigned to a
15180 * GT_LCL_VAR, then return the lcl node in ppDst.
15183 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
15190 GenTreePtr topQmark = NULL;
15191 if (expr->gtOper == GT_QMARK)
15195 else if (expr->gtOper == GT_ASG &&
15196 expr->gtOp.gtOp2->gtOper == GT_QMARK &&
15197 expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
15199 topQmark = expr->gtOp.gtOp2;
15202 *ppDst = expr->gtOp.gtOp1;
15209 /*********************************************************************************
15211 * For a castclass helper call,
15212 * Importer creates the following tree:
15213 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
15215 * This method splits the qmark expression created by the importer into the
15216 * following blocks: (block, asg, cond1, cond2, helper, remainder)
15217 * Notice that op1 is the result for both the conditions. So we coalesce these
15218 * assignments into a single block instead of two blocks resulting a nested diamond.
15220 * +---------->-----------+
15224 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
15226 * We expect to achieve the following codegen:
15227 * mov rsi, rdx tmp = op1 // asgBlock
15228 * test rsi, rsi goto skip if tmp == null ? // cond1Block
15230 * mov rcx, 0x76543210 cns = op2 // cond2Block
15231 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
15233 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
15235 * SKIP: // remainderBlock
15236 * tmp has the result.
15239 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
15244 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
15245 fgDispBasicBlocks(block, block, true);
15249 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
15251 GenTreePtr dst = nullptr;
15252 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
15253 noway_assert(dst != nullptr);
15255 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
15257 // Get cond, true, false exprs for the qmark.
15258 GenTreePtr condExpr = qmark->gtGetOp1();
15259 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
15260 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
15262 // Get cond, true, false exprs for the nested qmark.
15263 GenTreePtr nestedQmark = falseExpr;
15264 GenTreePtr cond2Expr;
15265 GenTreePtr true2Expr;
15266 GenTreePtr false2Expr;
15268 if (nestedQmark->gtOper == GT_QMARK)
15270 cond2Expr = nestedQmark->gtGetOp1();
15271 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
15272 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
15274 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
15275 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
15279 // This is a rare case that arises when we are doing minopts and encounter isinst of null
15280 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
15281 // That means it does not match our pattern.
15283 // Rather than write code to handle this case, just fake up some nodes to make it match the common
15284 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
15285 // entire subtree we expected to be the nested question op.
15287 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
15288 true2Expr = nestedQmark;
15289 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
15291 assert(false2Expr->OperGet() == trueExpr->OperGet());
15293 // Clear flags as they are now going to be part of JTRUE.
15294 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
15295 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
15297 // Create the chain of blocks. See method header comment.
15298 // The order of blocks after this is the following:
15299 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
15301 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
15302 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
15303 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
15304 // remainderBlock will still be GC safe.
15305 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
15306 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
15307 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
15309 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
15310 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
15311 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
15312 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
15314 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
15316 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
15317 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
15318 if ((block->bbFlags & BBF_INTERNAL) == 0)
15320 helperBlock->bbFlags &= ~BBF_INTERNAL;
15321 cond2Block->bbFlags &= ~BBF_INTERNAL;
15322 cond1Block->bbFlags &= ~BBF_INTERNAL;
15323 asgBlock->bbFlags &= ~BBF_INTERNAL;
15324 helperBlock->bbFlags |= BBF_IMPORTED;
15325 cond2Block->bbFlags |= BBF_IMPORTED;
15326 cond1Block->bbFlags |= BBF_IMPORTED;
15327 asgBlock->bbFlags |= BBF_IMPORTED;
15330 // Chain the flow correctly.
15331 fgAddRefPred(asgBlock, block);
15332 fgAddRefPred(cond1Block, asgBlock);
15333 fgAddRefPred(cond2Block, cond1Block);
15334 fgAddRefPred(helperBlock, cond2Block);
15335 fgAddRefPred(remainderBlock, helperBlock);
15336 fgAddRefPred(remainderBlock, cond1Block);
15337 fgAddRefPred(remainderBlock, cond2Block);
15339 cond1Block->bbJumpDest = remainderBlock;
15340 cond2Block->bbJumpDest = remainderBlock;
15342 // Set the weights; some are guesses.
15343 asgBlock->inheritWeight(block);
15344 cond1Block->inheritWeight(block);
15345 cond2Block->inheritWeightPercentage(cond1Block, 50);
15346 helperBlock->inheritWeightPercentage(cond2Block, 50);
15348 // Append cond1 as JTRUE to cond1Block
15349 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
15350 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
15351 fgInsertStmtAtEnd(cond1Block, jmpStmt);
15353 // Append cond2 as JTRUE to cond2Block
15354 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
15355 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
15356 fgInsertStmtAtEnd(cond2Block, jmpStmt);
15358 // AsgBlock should get tmp = op1 assignment.
15359 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
15360 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
15361 fgInsertStmtAtEnd(asgBlock, trueStmt);
15363 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
15364 gtReverseCond(cond2Expr);
15365 GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
15366 GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
15367 fgInsertStmtAtEnd(helperBlock, helperStmt);
15369 // Finally remove the nested qmark stmt.
15370 fgRemoveStmt(block, stmt);
15375 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
15376 fgDispBasicBlocks(block, remainderBlock, true);
15381 /*****************************************************************************
15383 * Expand a statement with a top level qmark node. There are three cases, based
15384 * on whether the qmark has both "true" and "false" arms, or just one of them.
15395 * S0 -->-- ~C -->-- T F -->-- S1
15400 * -----------------------------------------
15409 * S0 -->-- ~C -->-- T -->-- S1
15411 * +-->-------------+
15414 * -----------------------------------------
15423 * S0 -->-- C -->-- F -->-- S1
15425 * +-->------------+
15428 * If the qmark assigns to a variable, then create tmps for "then"
15429 * and "else" results and assign the temp to the variable as a writeback step.
15431 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
15433 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
15435 // Retrieve the Qmark node to be expanded.
15436 GenTreePtr dst = nullptr;
15437 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
15438 if (qmark == nullptr)
15443 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
15445 fgExpandQmarkForCastInstOf(block, stmt);
15452 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
15453 fgDispBasicBlocks(block, block, true);
15457 // Retrieve the operands.
15458 GenTreePtr condExpr = qmark->gtGetOp1();
15459 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
15460 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
15462 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
15463 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
15465 assert(!varTypeIsFloating(condExpr->TypeGet()));
15467 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
15468 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
15469 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
15471 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
15472 // block ... condBlock ... elseBlock ... remainderBlock
15474 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
15475 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
15476 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
15477 // remainderBlock will still be GC safe.
15478 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
15479 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
15480 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
15482 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
15483 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
15485 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
15486 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
15487 if ((block->bbFlags & BBF_INTERNAL) == 0)
15489 condBlock->bbFlags &= ~BBF_INTERNAL;
15490 elseBlock->bbFlags &= ~BBF_INTERNAL;
15491 condBlock->bbFlags |= BBF_IMPORTED;
15492 elseBlock->bbFlags |= BBF_IMPORTED;
15495 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
15497 condBlock->inheritWeight(block);
15499 fgAddRefPred(condBlock, block);
15500 fgAddRefPred(elseBlock, condBlock);
15501 fgAddRefPred(remainderBlock, elseBlock);
15503 BasicBlock* thenBlock = nullptr;
15504 if (hasTrueExpr && hasFalseExpr)
15509 // S0 -->-- ~C -->-- T F -->-- S1
15514 gtReverseCond(condExpr);
15515 condBlock->bbJumpDest = elseBlock;
15517 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
15518 thenBlock->bbJumpDest = remainderBlock;
15519 if ((block->bbFlags & BBF_INTERNAL) == 0)
15521 thenBlock->bbFlags &= ~BBF_INTERNAL;
15522 thenBlock->bbFlags |= BBF_IMPORTED;
15525 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
15527 fgAddRefPred(thenBlock, condBlock);
15528 fgAddRefPred(remainderBlock, thenBlock);
15530 thenBlock->inheritWeightPercentage(condBlock, 50);
15531 elseBlock->inheritWeightPercentage(condBlock, 50);
15533 else if (hasTrueExpr)
15536 // S0 -->-- ~C -->-- T -->-- S1
15538 // +-->-------------+
15541 gtReverseCond(condExpr);
15542 condBlock->bbJumpDest = remainderBlock;
15543 fgAddRefPred(remainderBlock, condBlock);
15544 // Since we have no false expr, use the one we'd already created.
15545 thenBlock = elseBlock;
15546 elseBlock = nullptr;
15548 thenBlock->inheritWeightPercentage(condBlock, 50);
15550 else if (hasFalseExpr)
15553 // S0 -->-- C -->-- F -->-- S1
15555 // +-->------------+
15558 condBlock->bbJumpDest = remainderBlock;
15559 fgAddRefPred(remainderBlock, condBlock);
15561 elseBlock->inheritWeightPercentage(condBlock, 50);
15564 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
15565 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
15566 fgInsertStmtAtEnd(condBlock, jmpStmt);
15568 // Remove the original qmark statement.
15569 fgRemoveStmt(block, stmt);
15571 // Since we have top level qmarks, we either have a dst for it in which case
15572 // we need to create tmps for true and falseExprs, else just don't bother
15574 unsigned lclNum = BAD_VAR_NUM;
15575 if (dst != nullptr)
15577 assert(dst->gtOper == GT_LCL_VAR);
15578 lclNum = dst->gtLclVar.gtLclNum;
15582 assert(qmark->TypeGet() == TYP_VOID);
15587 if (dst != nullptr)
15589 trueExpr = gtNewTempAssign(lclNum, trueExpr);
15591 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
15592 fgInsertStmtAtEnd(thenBlock, trueStmt);
15595 // Assign the falseExpr into the dst or tmp, insert in elseBlock
15598 if (dst != nullptr)
15600 falseExpr = gtNewTempAssign(lclNum, falseExpr);
15602 GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
15603 fgInsertStmtAtEnd(elseBlock, falseStmt);
15609 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
15610 fgDispBasicBlocks(block, remainderBlock, true);
15615 /*****************************************************************************
15617 * Expand GT_QMARK nodes from the flow graph into basic blocks.
15621 void Compiler::fgExpandQmarkNodes()
15625 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
15627 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
15629 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
15631 fgPreExpandQmarkChecks(expr);
15633 fgExpandQmarkStmt(block, stmt);
15637 fgPostExpandQmarkChecks();
15640 compQmarkRationalized = true;
15644 /*****************************************************************************
15646 * Make sure we don't have any more GT_QMARK nodes.
15649 void Compiler::fgPostExpandQmarkChecks()
15651 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
15653 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
15655 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
15656 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, NULL);
15662 /*****************************************************************************
15664 * Transform all basic blocks for codegen.
15667 void Compiler::fgMorph()
15669 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
15671 fgOutgoingArgTemps = nullptr;
15675 printf("*************** In fgMorph()\n");
15677 fgDispBasicBlocks(true);
15680 // Insert call to class constructor as the first basic block if
15681 // we were asked to do so.
15682 if (info.compCompHnd->initClass(NULL /* field */, info.compMethodHnd /* method */,
15683 impTokenLookupContextHandle /* context */) & CORINFO_INITCLASS_USE_HELPER)
15685 fgEnsureFirstBBisScratch();
15686 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
15690 if (opts.compGcChecks)
15692 for (unsigned i = 0; i < info.compArgsCount; i++)
15694 if (lvaTable[i].TypeGet() == TYP_REF)
15696 // confirm that the argument is a GC pointer (for debugging (GC stress))
15697 GenTreePtr op = gtNewLclvNode(i, TYP_REF);
15698 GenTreeArgList* args = gtNewArgList(op);
15699 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
15701 fgEnsureFirstBBisScratch();
15702 fgInsertStmtAtEnd(fgFirstBB, op);
15707 if (opts.compStackCheckOnRet)
15709 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
15710 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
15713 if (opts.compStackCheckOnCall)
15715 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
15716 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
15720 /* Filter out unimported BBs */
15722 fgRemoveEmptyBlocks();
15724 /* Add any internal blocks/trees we may need */
15729 fgMultipleNots = false;
15733 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
15734 fgDebugCheckBBlist(false, false);
15740 JITDUMP("trees after inlining\n");
15741 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
15744 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
15747 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
15748 fgDebugCheckBBlist(false, false);
15751 /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
15752 fgMarkImplicitByRefArgs();
15754 /* Promote struct locals if necessary */
15755 fgPromoteStructs();
15757 /* Now it is the time to figure out what locals have address-taken. */
15758 fgMarkAddressExposedLocals();
15761 /* Now that locals have address-taken marked, we can safely apply stress. */
15763 fgStress64RsltMul();
15766 /* Morph the trees in all the blocks of the method */
15771 JITDUMP("trees after fgMorphBlocks\n");
15772 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
15775 /* Decide the kind of code we want to generate */
15779 fgExpandQmarkNodes();
15787 /*****************************************************************************
15789 * Promoting struct locals
15791 void Compiler::fgPromoteStructs()
15795 printf("*************** In fgPromoteStructs()\n");
15798 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
15801 if (fgNoStructPromotion)
15805 // The code in this #if has been useful in debugging struct promotion issues, by
15806 // enabling selective enablement of the struct promotion optimization according to
15809 unsigned methHash = info.compMethodHash();
15810 char* lostr = getenv("structpromohashlo");
15811 unsigned methHashLo = 0;
15814 sscanf_s(lostr, "%x", &methHashLo);
15816 char* histr = getenv("structpromohashhi");
15817 unsigned methHashHi = UINT32_MAX;
15820 sscanf_s(histr, "%x", &methHashHi);
15822 if (methHash < methHashLo || methHash > methHashHi)
15828 printf("Promoting structs for method %s, hash = 0x%x.\n",
15829 info.compFullName, info.compMethodHash());
15830 printf(""); // in our logic this causes a flush
15835 if (info.compIsVarArgs)
15838 if (getNeedsGSSecurityCookie())
15841 // The lvaTable might grow as we grab temps. Make a local copy here.
15843 unsigned startLvaCount = lvaCount;
15846 // Loop through the original lvaTable. Looking for struct locals to be promoted.
15849 lvaStructPromotionInfo structPromotionInfo;
15850 bool tooManyLocals = false;
15852 for (unsigned lclNum = 0;
15853 lclNum < startLvaCount;
15856 // Whether this var got promoted
15857 bool promotedVar = false;
15858 LclVarDsc* varDsc = &lvaTable[lclNum];
15860 #ifdef FEATURE_SIMD
15861 if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
15863 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
15864 // its fields. Instead, we will attempt to enregister the entire struct.
15865 varDsc->lvRegStruct = true;
15868 #endif //FEATURE_SIMD
15869 // Don't promote if we have reached the tracking limit.
15870 if (lvaHaveManyLocals())
15872 // Print the message first time when we detected this condition
15873 if (!tooManyLocals)
15875 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
15877 tooManyLocals = true;
15879 #if !FEATURE_MULTIREG_STRUCT_PROMOTE
15880 else if (varDsc->lvIsMultiRegArgOrRet)
15882 JITDUMP("Skipping V%02u: marked lvIsMultiRegArgOrRet.\n", lclNum);
15884 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
15885 else if (varTypeIsStruct(varDsc))
15887 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
15888 bool canPromote = structPromotionInfo.canPromote;
15890 // We start off with shouldPromote same as canPromote.
15891 // Based on further profitablity checks done below, shouldPromote
15892 // could be set to false.
15893 bool shouldPromote = canPromote;
15898 // We *can* promote; *should* we promote?
15899 // We should only do so if promotion has potential savings. One source of savings
15900 // is if a field of the struct is accessed, since this access will be turned into
15901 // an access of the corresponding promoted field variable. Even if there are no
15902 // field accesses, but only block-level operations on the whole struct, if the struct
15903 // has only one or two fields, then doing those block operations field-wise is probably faster
15904 // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
15905 // So if no fields are accessed independently, and there are three or more fields,
15906 // then do not promote.
15907 if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
15909 JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n",
15910 lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
15911 shouldPromote = false;
15913 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
15914 // TODO-PERF - Only do this when the LclVar is used in an argument context
15915 // TODO-ARM64 - HFA support should also eliminate the need for this.
15916 // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
15918 // For now we currently don't promote structs with a single float field
15919 // Promoting it can cause us to shuffle it back and forth between the int and
15920 // the float regs when it is used as a argument, which is very expensive for XARCH
15922 else if ((structPromotionInfo.fieldCnt == 1) &&
15923 varTypeIsFloating(structPromotionInfo.fields[0].fldType))
15925 JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with single float field.\n",
15926 lclNum, structPromotionInfo.fieldCnt);
15927 shouldPromote = false;
15929 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
15931 #if !FEATURE_MULTIREG_STRUCT_PROMOTE
15932 #if defined(_TARGET_ARM64_)
15934 // For now we currently don't promote structs that could be passed in registers
15936 else if (varDsc->lvIsMultiregStruct())
15938 JITDUMP("Not promoting promotable struct local V%02u (size==%d): ",
15939 lclNum, lvaLclExactSize(lclNum));
15940 shouldPromote = false;
15942 #endif // _TARGET_ARM64_
15943 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
15944 else if (varDsc->lvIsParam)
15946 #if FEATURE_MULTIREG_STRUCT_PROMOTE
15947 if (varDsc->lvIsMultiregStruct() && // Is this a variable holding a value that is passed in multiple registers?
15948 (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
15950 JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
15952 shouldPromote = false;
15955 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
15957 // TODO-PERF - Implement struct promotion for incoming multireg structs
15958 // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
15960 if (structPromotionInfo.fieldCnt != 1)
15962 JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = %d.\n",
15963 lclNum, structPromotionInfo.fieldCnt);
15964 shouldPromote = false;
15969 // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the
15970 // incoming register into the stack frame slot.
15971 // In that case, we would like to avoid promortion.
15972 // However we haven't yet computed the lvRefCnt values so we can't do that.
15976 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
15977 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
15978 static int structPromoVarNum = 0;
15979 structPromoVarNum++;
15980 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
15985 assert(canPromote);
15987 // Promote the this struct local var.
15988 lvaPromoteStructVar(lclNum, &structPromotionInfo);
15989 promotedVar = true;
15991 #ifdef _TARGET_ARM_
15992 if (structPromotionInfo.requiresScratchVar)
15994 // Ensure that the scratch variable is allocated, in case we
15995 // pass a promoted struct as an argument.
15996 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
15998 lvaPromotedStructAssemblyScratchVar =
15999 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
16000 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
16003 #endif // _TARGET_ARM_
16008 #ifdef FEATURE_SIMD
16009 if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
16011 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
16012 // we will treat it as a reg struct.
16013 varDsc->lvRegStruct = true;
16015 #endif // FEATURE_SIMD
16021 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
16023 noway_assert(tree->OperGet() == GT_FIELD);
16024 noway_assert(tree->gtFlags & GTF_GLOB_REF);
16026 GenTreePtr objRef = tree->gtField.gtFldObj;
16028 /* Is this an instance data member? */
16032 if (objRef->gtOper == GT_ADDR)
16034 GenTreePtr obj = objRef->gtOp.gtOp1;
16036 if (obj->gtOper == GT_LCL_VAR)
16038 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
16039 LclVarDsc* varDsc = &lvaTable[lclNum];
16041 if (varTypeIsStruct(obj))
16043 if (varDsc->lvPromoted)
16046 unsigned fldOffset = tree->gtField.gtFldOffset;
16047 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
16048 noway_assert(fieldLclIndex != BAD_VAR_NUM);
16050 tree->SetOper(GT_LCL_VAR);
16051 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
16052 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
16053 tree->gtFlags &= GTF_NODE_MASK;
16054 tree->gtFlags &= ~GTF_GLOB_REF;
16056 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
16057 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
16059 tree->gtFlags |= GTF_VAR_DEF;
16060 tree->gtFlags |= GTF_DONT_CSE;
16065 printf("Replacing the field in promoted struct with a local var:\n");
16066 fgWalkPre->printModified = true;
16069 return WALK_SKIP_SUBTREES;
16075 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
16076 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 bytes).
16077 // Normally, the type of the local var and the type of GT_FIELD are equivalent. However, there
16078 // is one extremely rare case where that won't be true. An enum type is a special value type
16079 // that contains exactly one element of a primitive integer type (that, for CLS programs is named "value__").
16080 // The VM tells us that a local var of that enum type is the primitive type of the enum's single field.
16081 // It turns out that it is legal for IL to access this field using ldflda or ldfld. For example:
16083 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
16085 // .field public specialname rtspecialname int16 value__
16086 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
16088 // .method public hidebysig static void Main() cil managed
16090 // .locals init (valuetype mynamespace.e_t V_0)
16093 // ldflda int16 mynamespace.e_t::value__
16097 // Normally, compilers will not generate the ldflda, since it is superfluous.
16099 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
16100 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
16101 // mismatch like this, don't do this morphing. The local var may end up getting marked as
16102 // address taken, and the appropriate SHORT load will be done from memory in that case.
16104 if (tree->TypeGet() == obj->TypeGet())
16106 tree->ChangeOper(GT_LCL_VAR);
16107 tree->gtLclVarCommon.SetLclNum(lclNum);
16108 tree->gtFlags &= GTF_NODE_MASK;
16110 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
16111 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
16113 tree->gtFlags |= GTF_VAR_DEF;
16114 tree->gtFlags |= GTF_DONT_CSE;
16119 printf("Replacing the field in normed struct with the local var:\n");
16120 fgWalkPre->printModified = true;
16123 return WALK_SKIP_SUBTREES;
16130 return WALK_CONTINUE;
16133 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
16135 noway_assert(tree->OperGet() == GT_LCL_FLD);
16137 unsigned lclNum = tree->gtLclFld.gtLclNum;
16138 LclVarDsc* varDsc = &lvaTable[lclNum];
16140 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
16143 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
16144 unsigned fieldLclIndex = 0;
16145 LclVarDsc* fldVarDsc = NULL;
16147 if (fldOffset != BAD_VAR_NUM)
16149 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
16150 noway_assert(fieldLclIndex != BAD_VAR_NUM);
16151 fldVarDsc = &lvaTable[fieldLclIndex];
16154 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
16155 #ifdef _TARGET_X86_
16156 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
16160 // There is an existing sub-field we can use
16161 tree->gtLclFld.SetLclNum(fieldLclIndex);
16163 // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
16164 assert(varTypeIsIntegralOrI(tree->TypeGet()));
16165 if (varTypeCanReg(fldVarDsc->TypeGet()))
16167 // If the type is integer-ish, then we can use it as-is
16168 tree->ChangeOper(GT_LCL_VAR);
16169 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
16170 tree->gtType = fldVarDsc->TypeGet();
16174 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
16175 fgWalkPre->printModified = true;
16180 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
16181 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
16183 tree->gtFlags |= GTF_VAR_DEF;
16184 tree->gtFlags |= GTF_DONT_CSE;
16189 // There is no existing field that has all the parts that we need
16190 // So we must ensure that the struct lives in memory.
16191 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
16194 // We can't convert this guy to a float because he really does have his
16196 varDsc->lvKeepType = 1;
16200 return WALK_SKIP_SUBTREES;
16203 return WALK_CONTINUE;
16206 /*****************************************************************************
16208 * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
16209 * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
16211 void Compiler::fgMarkImplicitByRefArgs()
16213 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
16216 printf("\n*************** In fgMarkImplicitByRefs()\n");
16219 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
16221 LclVarDsc* varDsc = &lvaTable[lclNum];
16223 assert(!varDsc->lvPromoted); // Called in the wrong order?
16225 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
16229 if (varDsc->lvSize() > REGSIZE_BYTES)
16231 size = varDsc->lvSize();
16235 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
16236 size = info.compCompHnd->getClassSize(typeHnd);
16240 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
16241 #if defined(_TARGET_AMD64_)
16242 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
16243 #elif defined(_TARGET_ARM64_)
16244 if ((size > TARGET_POINTER_SIZE) && !varDsc->lvIsMultiregStruct())
16248 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
16249 // So I am now using it to indicate that this is one of the weird implicit
16251 // The address taken cleanup will look for references to locals marked like
16252 // this, and transform them appropriately.
16253 varDsc->lvIsTemp = 1;
16255 // Also marking them as BYREF will hide them from struct promotion.
16256 varDsc->lvType = TYP_BYREF;
16257 varDsc->lvRefCnt = 0;
16259 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
16260 // make sure that the following flag is not set as these will force SSA to
16261 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
16263 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
16266 // This should not be converted to a double in stress mode,
16267 // because it is really a pointer
16268 varDsc->lvKeepType = 1;
16272 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
16276 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
16280 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
16283 /*****************************************************************************
16285 * Morph irregular parameters
16286 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
16288 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, fgWalkData* fgWalkPre)
16290 #if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
16294 #else // _TARGET_AMD64_ || _TARGET_ARM64_
16296 assert((tree->gtOper == GT_LCL_VAR) ||
16297 ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
16299 bool isAddr = (tree->gtOper == GT_ADDR);
16300 GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
16301 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
16302 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
16304 if (!lvaIsImplicitByRefLocal(lclNum))
16306 // We only need to tranform the 'marked' implicit by ref parameters
16310 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
16311 // re-invoke the traversal to mark address-taken locals.
16312 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
16313 // If we do, leave it as-is.
16314 if (!varTypeIsStruct(lclVarTree))
16316 assert(lclVarTree->TypeGet() == TYP_BYREF);
16320 // We are overloading the lvRefCnt field here because real ref counts have not been set.
16321 lclVarDsc->lvRefCnt++;
16325 // change &X into just plain X
16326 tree->CopyFrom(lclVarTree, this);
16327 tree->gtType = TYP_BYREF;
16332 printf("Replacing address of implicit by ref struct parameter with byref:\n");
16333 fgWalkPre->printModified = true;
16339 // Change X into *X
16340 // First, save the original type, then change the tree type to TYP_BYREF (otherwise we
16341 // will get an assert when we try to clone the lclVar node because the lclVar is now TYP_BYREF
16342 // and the types have to match). The reason we clone the lclVar is that we don't pass a
16343 // possible-modified tree back to the caller, so we modify the original lclVar node in-place
16345 var_types structType = tree->gtType;
16346 lclVarTree = gtClone(tree);
16347 // Now, set the types appropriately.
16348 lclVarTree->gtType = TYP_BYREF;
16349 tree->gtType = structType;
16350 // Now, "insert" the GT_IND by changing the oper of the original node and setting its op1.
16351 tree->SetOper(GT_IND);
16352 tree->gtOp.gtOp1 = lclVarTree;
16353 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
16354 // we could remove TGTANYWHERE
16355 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
16360 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
16361 gtDispTree(tree, nullptr, nullptr, true);
16362 fgWalkPre->printModified = true;
16369 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
16374 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
16375 enum AddrExposedContext
16377 AXC_None, // None of the below seen yet.
16378 AXC_Ind, // The address being computed is to be dereferenced.
16379 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
16380 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
16381 // addresses -- if the address addresses a field of a struct local, we need to consider
16382 // the entire local address taken (not just the field).
16383 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
16384 // on more bytes than the width of the storage location addressed. If this is a
16385 // field of a promoted struct local, declare the entire struct local address-taken.
16386 AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context.
16387 AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose
16388 // args should be evaluated in an IND context.
16389 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
16390 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
16393 typedef ArrayStack<AddrExposedContext> AXCStack;
16395 // We use pre-post to simulate passing an argument in a recursion, via a stack.
16396 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree,
16397 fgWalkData* fgWalkPre)
16399 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
16400 (void)axcStack->Pop();
16401 return WALK_CONTINUE;
16404 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree,
16405 fgWalkData* fgWalkPre)
16407 GenTreePtr tree = *pTree;
16408 Compiler* comp = fgWalkPre->compiler;
16409 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
16410 AddrExposedContext axc = axcStack->Top();
16412 // In some situations, we have to figure out what the effective context is in which to
16413 // evaluate the current tree, depending on which argument position it is in its parent.
16415 // If the parent was an initblock, and this is its first argument, we're in
16416 // and "ind" context.
16422 // In both cases, the second argument is an integer struct size. That should have a "none" context.
16423 // The first argument is a GT_LIST. For GT_COPYBLK, both args of the list are addresses
16424 // that are dereferenced; for GT_INITBLK, the first is. We pass "axc" to the GT_LIST;
16425 // which will pass it to its arguments; these will decide whether they're in an Ind context
16426 // depending on "axc" and which argument they are.
16427 // A GT_INITBLK's first argument is a GT_LIST, whose first argument is an address
16428 // that should be considered to be dereferenced, and whose second argument the integer
16429 // (byte) value to fill the block with. The second argument of the GT_INITBLK is also
16430 // an integer, the block size.
16431 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
16432 if (parent->gtOp.gtOp2 == tree &&
16433 parent->OperIsBlkOp())
16437 else if (parent->OperGet() == GT_LIST)
16439 genTreeOps axcOper = fgWalkPre->parentStack->Index(2)->OperGet();
16440 assert((axc == AXC_InitBlk && axcOper == GT_INITBLK) ||
16441 (axc == AXC_CopyBlk && GenTree::OperIsCopyBlkOp(axcOper)));
16443 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
16444 // or copyblk exceeds the size of a storage location whose address is used as one of the
16445 // arguments, then we have to consider that storage location (indeed, it's underlying containing
16446 // location) to be address taken. So get the width of the initblk or copyblk.
16447 GenTreePtr widthNode = fgWalkPre->parentStack->Index(2)->gtOp.gtOp2;
16448 unsigned width = UINT_MAX; // If it's not a constant, assume it's maximally big.
16449 if (widthNode->IsCnsIntOrI())
16451 if (widthNode->IsIconHandle())
16453 // If it's a handle, it must be a class handle. We only create such block operations
16454 // for initialization of struct types, so the type of the argument(s) will match this
16455 // type, by construction. Set the width to zero to make sure nothing fits in it.
16456 assert(widthNode->IsIconHandle(GTF_ICON_CLASS_HDL));
16461 ssize_t swidth = widthNode->gtIntConCommon.IconValue();
16462 assert(swidth > 0); // Well-formedness of the block operation node...
16463 width = unsigned(swidth);
16467 if (parent->gtOp.gtOp1 == tree)
16469 // First argument is (potentially) dereferenced by both kinds of block operations.
16470 if (tree->OperGet() == GT_ADDR && !comp->fgFitsInOrNotLoc(tree->gtOp.gtOp1, width))
16479 else if (axc == AXC_CopyBlk)
16481 assert(parent->gtOp.gtOp2 == tree);
16482 if (tree->OperGet() == GT_ADDR && !comp->fgFitsInOrNotLoc(tree->gtOp.gtOp1, width))
16501 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
16502 assert(parent->OperGet() == GT_ADD);
16503 // Is one of the args a constant representing a field offset,
16504 // and is this the other? If so, Ind context.
16505 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
16509 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
16524 // Now recurse properly for the tree.
16525 switch (tree->gtOper)
16529 if (axc != AXC_Addr)
16531 axcStack->Push(AXC_Ind);
16535 axcStack->Push(AXC_None);
16537 return WALK_CONTINUE;
16540 axcStack->Push(AXC_InitBlk);
16541 return WALK_CONTINUE;
16545 axcStack->Push(AXC_CopyBlk);
16546 return WALK_CONTINUE;
16549 if (axc == AXC_InitBlk || axc == AXC_CopyBlk)
16551 axcStack->Push(axc);
16555 axcStack->Push(AXC_None);
16557 return WALK_CONTINUE;
16560 // Taking the address of an array element never takes the address of a local.
16561 axcStack->Push(AXC_None);
16562 return WALK_CONTINUE;
16565 // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
16566 // convert to just "lcl". This is never an address-context use, since the local is already a
16567 // byref after this transformation.
16568 if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(tree, fgWalkPre))
16570 // Push something to keep the PostCB, which will pop it, happy.
16571 axcStack->Push(AXC_None);
16572 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
16573 return WALK_SKIP_SUBTREES;
16576 #ifdef FEATURE_SIMD
16577 if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
16579 axcStack->Push(AXC_None);
16582 #endif // FEATURE_SIMD
16583 if (axc == AXC_Ind)
16585 axcStack->Push(AXC_None);
16587 else if (axc == AXC_IndWide)
16589 axcStack->Push(AXC_AddrWide);
16593 assert(axc == AXC_None);
16594 axcStack->Push(AXC_Addr);
16596 return WALK_CONTINUE;
16599 // First, handle a couple of special cases: field of promoted struct local, field
16600 // of "normed" struct.
16601 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
16603 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
16604 // label it addr-taken.
16605 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
16607 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
16608 comp->lvaSetVarAddrExposed(lclNum);
16609 if (axc == AXC_AddrWide)
16611 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
16612 if (varDsc->lvIsStructField)
16614 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
16618 // Push something to keep the PostCB, which will pop it, happy.
16619 axcStack->Push(AXC_None);
16620 return WALK_SKIP_SUBTREES;
16624 // GT_FIELD is an implicit deref.
16625 if (axc == AXC_Addr)
16627 axcStack->Push(AXC_None);
16629 else if (axc == AXC_AddrWide)
16631 axcStack->Push(AXC_IndWide);
16635 axcStack->Push(AXC_Ind);
16637 return WALK_CONTINUE;
16642 assert(axc != AXC_Addr);
16643 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
16644 // else WALK_CONTINUE. We do the same here.
16645 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
16646 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
16648 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
16649 comp->lvaSetVarAddrExposed(lclNum);
16650 if (axc == AXC_AddrWide)
16652 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
16653 if (varDsc->lvIsStructField)
16655 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
16659 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
16660 // what, but something to be popped by the post callback. If we're going
16661 // to analyze children, the LCL_FLD creates an Ind context, so use that.
16662 axcStack->Push(AXC_Ind);
16667 // On some architectures, some arguments are passed implicitly by reference.
16668 // Modify the trees to reflect that, if this local is one of those.
16669 if (comp->fgMorphImplicitByRefArgs(tree, fgWalkPre))
16671 // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
16672 // handled earlier. (And we can't have added anything to this address, since it was implicit.)
16673 assert(axc != AXC_Addr);
16677 if (axc == AXC_Addr || axc == AXC_AddrWide)
16679 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
16680 comp->lvaSetVarAddrExposed(lclNum);
16681 if (axc == AXC_AddrWide)
16683 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
16684 if (varDsc->lvIsStructField)
16686 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
16690 // We may need to Quirk the storage size for this LCL_VAR
16691 // some PInvoke signatures incorrectly specify a ByRef to an INT32
16692 // when they actually write a SIZE_T or INT64
16693 if (axc == AXC_Addr)
16695 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
16699 // Push something to keep the PostCB, which will pop it, happy.
16700 axcStack->Push(AXC_None);
16701 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
16702 return WALK_SKIP_SUBTREES;
16705 assert(axc != AXC_Addr);
16706 // See below about treating pointer operations as wider indirection.
16707 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
16709 axcStack->Push(AXC_IndWide);
16711 else if (axc == AXC_Ind)
16713 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
16714 // If it's an add of a constant and an address, and the constant represents a field,
16715 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
16716 axcStack->Push(AXC_IndAdd);
16720 axcStack->Push(axc);
16722 return WALK_CONTINUE;
16724 // !!! Treat Pointer Operations as Wider Indirection
16726 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
16727 // indirection. This is because the pointers could be pointing to the address of struct fields
16728 // and could be used to perform operations on the whole struct or passed to another method.
16730 // When visiting a node in this pre-order walk, we do not know if we would in the future
16731 // encounter a GT_ADDR of a GT_FIELD below.
16733 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
16734 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
16735 // wider indirection context down the expr tree.
16737 // Example, in unsafe code,
16739 // IL_000e 12 00 ldloca.s 0x0
16740 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
16741 // IL_0015 12 00 ldloca.s 0x0
16742 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
16745 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
16746 // consider GT_SUB to be equivalent of an AXC_IndWide.
16748 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
16749 // them as AXC_IndWide.
16773 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) || (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
16775 axcStack->Push(AXC_IndWide);
16776 return WALK_CONTINUE;
16781 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
16782 // handle the "Ind" propogation explicitly above.
16783 if (axc == AXC_Addr || axc == AXC_AddrWide)
16785 axcStack->Push(axc);
16789 axcStack->Push(AXC_None);
16791 return WALK_CONTINUE;
16795 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
16797 if (tree->TypeGet() != TYP_STRUCT)
16799 return width <= genTypeSize(tree->TypeGet());
16801 else if (tree->OperGet() == GT_LCL_VAR)
16803 assert(tree->TypeGet() == TYP_STRUCT);
16804 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
16805 return width <= lvaTable[lclNum].lvExactSize;
16807 else if (tree->OperGet() == GT_FIELD)
16809 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass (tree->gtField.gtFldHnd);
16810 return width <= info.compCompHnd->getClassSize(fldClass);
16819 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
16821 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
16823 switch (op1->OperGet())
16826 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
16828 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
16829 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
16834 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
16836 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
16839 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
16840 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
16843 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
16845 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
16848 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
16849 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
16856 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
16859 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
16860 op1->gtIntCon.gtFieldSeq = op1Fs;
16866 // Record in the general zero-offset map.
16867 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
16872 /*****************************************************************************
16874 * Mark address-taken locals.
16877 void Compiler::fgMarkAddressExposedLocals()
16881 printf("\n*************** In fgMarkAddressExposedLocals()\n");
16884 BasicBlock* block = fgFirstBB;
16885 noway_assert(block);
16889 /* Make the current basic block address available globally */
16895 for (stmt = block->bbTreeList;
16897 stmt = stmt->gtNext)
16899 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
16900 AXCStack stk(this);
16901 stk.Push(AXC_None); // We start in neither an addr or ind context.
16902 fgWalkTree(&stmt->gtStmt.gtStmtExpr,
16903 fgMarkAddrTakenLocalsPreCB,
16904 fgMarkAddrTakenLocalsPostCB,
16908 block = block->bbNext;
16914 // fgNodesMayInterfere:
16915 // return true if moving nodes relative to each other can change the result of a computation
16918 // read: a node which reads
16921 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
16923 LclVarDsc* srcVar = nullptr;
16924 bool srcAliased = false;
16925 bool dstAliased = false;
16927 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
16928 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
16930 if (read->OperIsLocal())
16931 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
16935 if (srcVar && srcVar->lvAddrExposed)
16937 else if (readIsIndir)
16941 else if (write->OperIsLocal())
16943 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
16946 return dstVar->lvAddrExposed;
16948 else if (read->OperIsLocal())
16950 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
16965 /** This predicate decides whether we will fold a tree with the structure:
16966 * x = x <op> y where x could be any arbitrary expression into
16969 * This modification is only performed when the target architecture supports
16970 * complex addressing modes. In the case of ARM for example, this transformation
16971 * yields no benefit.
16973 * In case this functions decides we can proceed to fold into an assignment operator
16974 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
16975 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
16976 * x <op>= y because the operator property.
16978 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
16980 #if CPU_LOAD_STORE_ARCH
16981 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
16983 #elif !defined(LEGACY_BACKEND)
16985 #else // defined(LEGACY_BACKEND)
16988 GenTreePtr op1 = tree->gtOp.gtOp1;
16989 GenTreePtr op2 = tree->gtGetOp2();
16990 genTreeOps cmop = op2->OperGet();
16992 /* Is the destination identical to the first RHS sub-operand? */
16993 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
16996 Do not transform the following tree
16998 [0024CFA4] ----------- const int 1
16999 [0024CFDC] ----G------ | int
17000 [0024CF5C] ----------- lclVar ubyte V01 tmp0
17001 [0024D05C] -A--G------ = ubyte
17002 [0024D014] D------N--- lclVar ubyte V01 tmp0
17006 [0024CFA4] ----------- const int 1
17007 [0024D05C] -A--G------ |= ubyte
17008 [0024D014] U------N--- lclVar ubyte V01 tmp0
17010 , when V01 is a struct field local.
17013 if (op1->gtOper == GT_LCL_VAR &&
17014 varTypeIsSmall(op1->TypeGet()) &&
17015 op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
17017 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
17018 LclVarDsc* varDsc = lvaTable + lclNum;
17020 if (varDsc->lvIsStructField)
17029 else if (GenTree::OperIsCommutative(cmop))
17031 /* For commutative ops only, check for "a = x <op> a" */
17033 /* Should we be doing this at all? */
17034 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
17039 /* Can we swap the operands to cmop ... */
17040 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) &&
17041 (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) )
17043 // Both sides must have side effects to prevent swap */
17047 /* Is the destination identical to the second RHS sub-operand? */
17048 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
17055 #endif // defined(LEGACY_BACKEND)
17058 // Static variables.
17059 Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock);
17061 #ifdef FEATURE_SIMD
17063 //-----------------------------------------------------------------------------------
17064 // fgMorphCombineSIMDFieldAssignments:
17065 // If the RHS of the input stmt is a read for simd vector X Field, then this function
17066 // will keep reading next few stmts based on the vector size(2, 3, 4).
17067 // If the next stmts LHS are located contiguous and RHS are also located
17068 // contiguous, then we replace those statements with a copyblk.
17071 // block - BasicBlock*. block which stmt belongs to
17072 // stmt - GenTreeStmt*. the stmt node we want to check
17075 // if this funciton successfully optimized the stmts, then return true. Otherwise
17078 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
17081 noway_assert(stmt->gtOper == GT_STMT);
17082 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
17083 assert(tree->OperGet() == GT_ASG);
17085 GenTreePtr originalLHS = tree->gtOp.gtOp1;
17086 GenTreePtr prevLHS = tree->gtOp.gtOp1;
17087 GenTreePtr prevRHS = tree->gtOp.gtOp2;
17088 unsigned index = 0;
17089 var_types baseType = TYP_UNKNOWN;
17090 unsigned simdSize = 0;
17091 GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
17093 if (simdStructNode == nullptr ||
17095 baseType != TYP_FLOAT)
17097 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
17101 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
17102 int remainingAssignments = assignmentsCount;
17103 GenTreePtr curStmt = stmt->gtNext;
17104 GenTreePtr lastStmt = stmt;
17106 while (curStmt != nullptr && remainingAssignments > 0)
17108 GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
17109 if (exp->OperGet() != GT_ASG)
17113 GenTreePtr curLHS = exp->gtGetOp1();
17114 GenTreePtr curRHS = exp->gtGetOp2();
17116 if (!areArgumentsContiguous(prevLHS, curLHS) ||
17117 !areArgumentsContiguous(prevRHS, curRHS))
17122 remainingAssignments--;
17126 lastStmt = curStmt;
17127 curStmt = curStmt->gtNext;
17130 if (remainingAssignments > 0)
17132 // if the left assignments number is bigger than zero, then this means
17133 // that the assignments are not assgining to the contiguously memory
17134 // locations from same vector.
17140 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
17141 printf("From BB%02u, stmt", block->bbNum);
17143 printf(" to stmt");
17144 printTreeID(lastStmt);
17150 for (int i = 0; i < assignmentsCount; i++)
17152 fgRemoveStmt(block, stmt->gtNext);
17155 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
17156 if (simdStructNode->OperIsLocal())
17158 setLclRelatedToSIMDIntrinsic(simdStructNode);
17160 GenTree* copyBlkAddr = copyBlkDst;
17161 if (copyBlkAddr->gtOper == GT_LEA)
17163 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
17165 GenTreeLclVarCommon* localDst = nullptr;
17166 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
17168 setLclRelatedToSIMDIntrinsic(localDst);
17171 GenTree* simdStructAddr;
17172 if (simdStructNode->TypeGet() == TYP_BYREF)
17174 assert(simdStructNode->OperIsLocal());
17175 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
17176 simdStructAddr = simdStructNode;
17180 assert(varTypeIsSIMD(simdStructNode));
17181 simdStructAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, simdStructNode);
17187 printf("\nBB%02u stmt", block->bbNum);
17189 printf("(before)\n");
17194 tree = gtNewBlkOpNode(GT_COPYBLK,
17197 gtNewIconNode(simdSize),
17200 stmt->gtStmt.gtStmtExpr = tree;
17202 // Since we generated a new address node which didn't exist before,
17203 // we should expose this address manually here.
17204 AXCStack stk(this);
17205 stk.Push(AXC_None);
17206 fgWalkTree(&stmt->gtStmt.gtStmtExpr,
17207 fgMarkAddrTakenLocalsPreCB,
17208 fgMarkAddrTakenLocalsPostCB,
17214 printf("\nReplaced BB%02u stmt", block->bbNum);
17216 printf("(after)\n");
17223 #endif //FEATURE_SIMD