1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 /*****************************************************************************/
21 /*****************************************************************************/
24 const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2;
26 /*****************************************************************************
28 * We've found all the candidates, build the index for easy access.
31 void Compiler::optCSEstop()
33 if (optCSECandidateCount == 0)
42 optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
44 for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++)
46 for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
50 noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
51 if (optCSEtab[dsc->csdIndex - 1] == nullptr)
53 optCSEtab[dsc->csdIndex - 1] = dsc;
60 for (cnt = 0; cnt < optCSECandidateCount; cnt++)
62 noway_assert(optCSEtab[cnt] != nullptr);
67 /*****************************************************************************
69 * Return the descriptor for the CSE with the given index.
72 inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index)
75 noway_assert(index <= optCSECandidateCount);
76 noway_assert(optCSEtab[index - 1]);
78 return optCSEtab[index - 1];
81 //------------------------------------------------------------------------
82 // Compiler::optUnmarkCSE
85 // tree - A sub tree that originally was part of a CSE use
86 // that we are currently in the process of removing.
89 // Returns true if we can safely remove the 'tree' node.
90 // Returns false if the node is a CSE def that the caller
91 // needs to extract and preserve.
94 // If 'tree' is a CSE use then we perform an unmark CSE operation
95 // so that the CSE used counts and weight are updated properly.
96 // The only caller for this method is optUnmarkCSEs which is a
97 // tree walker vistor function. When we return false this method
98 // returns WALK_SKIP_SUBTREES so that we don't visit the remaining
99 // nodes of the CSE def.
101 bool Compiler::optUnmarkCSE(GenTree* tree)
103 if (!IS_CSE_INDEX(tree->gtCSEnum))
105 // If this node isn't a CSE use or def we can safely remove this node.
110 // make sure it's been initialized
111 noway_assert(optCSEweight <= BB_MAX_WEIGHT);
113 // Is this a CSE use?
114 if (IS_CSE_USE(tree->gtCSEnum))
116 unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
117 CSEdsc* desc = optCSEfindDsc(CSEnum);
122 printf("Unmark CSE use #%02d at ", CSEnum);
124 printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
128 // Perform an unmark CSE operation
130 // 1. Reduce the nested CSE's 'use' count
132 noway_assert(desc->csdUseCount > 0);
134 if (desc->csdUseCount > 0)
136 desc->csdUseCount -= 1;
138 if (desc->csdUseWtCnt < optCSEweight)
140 desc->csdUseWtCnt = 0;
144 desc->csdUseWtCnt -= optCSEweight;
148 // 2. Unmark the CSE infomation in the node
150 tree->gtCSEnum = NO_CSE;
155 // It is not safe to remove this node, so we will return false
156 // and the caller must add this node to the side effect list
162 Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTree** pTree, fgWalkData* data)
164 if (*pTree == data->pCallbackData)
166 return WALK_CONTINUE;
169 if ((*pTree)->gtFlags & GTF_DONT_CSE)
172 // Fix 392756 WP7 Crossgen
173 // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT
175 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
176 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
177 // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this
178 // should not prevent tree's above the constant from becoming CSE's.
180 if ((*pTree)->gtOper == GT_CNS_INT)
182 return WALK_SKIP_SUBTREES;
188 return WALK_SKIP_SUBTREES;
191 Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTree** pTree, fgWalkData* data)
193 GenTree* tree = *pTree;
194 Compiler* comp = data->compiler;
196 /* Calls get DONT_CSE implicitly */
197 if (tree->OperGet() == GT_CALL)
199 if (!IsSharedStaticHelper(tree))
201 tree->gtFlags |= GTF_DONT_CSE;
205 if ((tree->gtFlags & GTF_DONT_CSE) == 0)
207 /* Propagate the DONT_CSE flag from child to parent */
208 if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT)
210 tree->gtFlags |= GTF_DONT_CSE;
214 return WALK_CONTINUE;
217 //------------------------------------------------------------------------
218 // Compiler::optUnmarkCSEs
219 // Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's.
221 // argument 'pTree' is a pointer to a GenTree*
222 // argument 'data' contains pCallbackData which needs to be cast from void*
223 // to our actual 'wbKeepList' a pointer to a GenTree*
225 // Any nodes visited that are in the 'keepList' are kept and we
226 // return WALK_SKIP_SUBTREES for them
227 // For any node not in the 'keepList' we call optUnmarkCSE and either
228 // remove the node and decrement the LclVar ref counts
229 // or add the subtree to 'wbKeepList' using gtBuildCommaList
230 // and return WALK_SKIP_SUBTREES.
234 Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTree** pTree, fgWalkData* data)
236 GenTree* tree = *pTree;
237 Compiler* comp = data->compiler;
238 GenTree** wbKeepList = (GenTree**)(data->pCallbackData);
239 GenTree* keepList = nullptr;
241 noway_assert(wbKeepList != nullptr);
243 keepList = *wbKeepList;
245 // We may already have a side effect list that is being kept
249 GenTree* keptTree = keepList;
251 while (keptTree->OperGet() == GT_COMMA)
253 assert(keptTree->OperKind() & GTK_SMPOP);
254 GenTree* op1 = keptTree->gtOp.gtOp1;
255 GenTree* op2 = keptTree->gtGetOp2();
257 // For the GT_COMMA case the op1 is part of the original CSE tree
258 // that is being kept because it contains some side-effect
262 // This tree and all of its sub trees are being kept
263 return WALK_SKIP_SUBTREES;
266 // For the GT_COMMA case, op2 is the remaining side-effects of the
267 // original CSE tree. This can again be another GT_COMMA or the
268 // final side-effect part.
272 if (tree == keptTree)
274 // This tree and all of its sub trees are being kept
275 return WALK_SKIP_SUBTREES;
278 // Now 'tree' is known to be a node that is not in the 'keepList',
279 // thus we may be allowed to remove it.
281 if (comp->optUnmarkCSE(tree))
283 // The call to optUnmarkCSE(tree) should have cleared any CSE info
285 assert(!IS_CSE_INDEX(tree->gtCSEnum));
287 // This node is to be removed from the graph of GenTree*
288 // next decrement any LclVar references.
290 if (tree->gtOper == GT_LCL_VAR)
295 // This variable ref is going away, decrease its ref counts
297 lclNum = tree->gtLclVarCommon.gtLclNum;
298 assert(lclNum < comp->lvaCount);
299 varDsc = comp->lvaTable + lclNum;
301 // make sure it's been initialized
302 assert(comp->optCSEweight <= BB_MAX_WEIGHT);
304 // Decrement its lvRefCnt and lvRefCntWtd
306 varDsc->decRefCnts(comp->optCSEweight, comp);
309 else // optUnmarkCSE(tree) returned false
311 // This node must be a CSE def and it can not be removed.
312 // Instead we will add it to the 'keepList'.
313 assert(IS_CSE_DEF(tree->gtCSEnum));
315 // The prior call to optHasCSEdefWithSideeffect ensures this
317 assert(!comp->gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE));
322 printf("Preserving the CSE def #%02d at ", GET_CSE_INDEX(tree->gtCSEnum));
324 printf(" because it is nested inside a CSE use\n");
328 // This tree and all of its sub-trees will be kept
330 *wbKeepList = comp->gtBuildCommaList(*wbKeepList, tree);
332 return WALK_SKIP_SUBTREES;
335 return WALK_CONTINUE;
338 //------------------------------------------------------------------------
339 // Compiler::optHasCSEdefWithSideeffect
340 // Helper passed to Compiler::fgWalkAllTreesPre() to deternine if
341 // a sub-tree has a CSE def that contains persistent side-effects.
343 // We will return WALK_ABORT upon encountering the case above.
344 // A final return value of WALK_CONTINUE or WALK_SKIP_SUBTREES means that
345 // there wasn't a CSE def with persistent side-effects.
347 // argument 'pTree' is a pointer to a GenTree*
348 // argument 'data' contains pCallbackData which needs to be cast from void*
349 // to our actual 'wbKeepList' a pointer to a GenTree*
351 // Any nodes visited that are in the 'keepList' are skipped
352 // For visted node not in the 'keepList' we check if the node is a CSE def
353 // and if it is then we check if it has persistent side-effects
354 // and if so we return WALK_ABORT.
358 Compiler::fgWalkResult Compiler::optHasCSEdefWithSideeffect(GenTree** pTree, fgWalkData* data)
360 GenTree* tree = *pTree;
361 Compiler* comp = data->compiler;
362 GenTree** wbKeepList = (GenTree**)(data->pCallbackData);
363 GenTree* keepList = nullptr;
365 noway_assert(wbKeepList != nullptr);
367 keepList = *wbKeepList;
369 // We may already have a side effect list that is being kept
373 GenTree* keptTree = keepList;
375 while (keptTree->OperGet() == GT_COMMA)
377 assert(keptTree->OperKind() & GTK_SMPOP);
378 GenTree* op1 = keptTree->gtOp.gtOp1;
379 GenTree* op2 = keptTree->gtGetOp2();
381 // For the GT_COMMA case the op1 is part of the original CSE tree
382 // that is being kept because it contains some side-effect
386 // This tree and all of its sub trees are being kept
387 return WALK_SKIP_SUBTREES;
390 // For the GT_COMMA case, op2 is the remaining side-effects of the
391 // original CSE tree. This can again be another GT_COMMA or the
392 // final side-effect part.
396 if (tree == keptTree)
398 // This tree and all of its sub trees are being kept
399 return WALK_SKIP_SUBTREES;
402 // Now 'tree' is known to be a node that is not in the 'keepList',
404 // We will check if it is a CSE def
406 if (IS_CSE_DEF(tree->gtCSEnum))
408 // This node is a CSE def
410 // If this node contains any persistent side effects then we will return WALK_ABORT.
412 if (comp->gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE))
414 // This nested CSE def contains a persistent side effect
415 // We just abort now as this case is problematic.
420 return WALK_CONTINUE;
423 Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTree** pTree, fgWalkData* walkData)
425 GenTree* tree = *pTree;
426 Compiler* comp = walkData->compiler;
427 optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
429 if (IS_CSE_INDEX(tree->gtCSEnum))
431 unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
432 unsigned cseBit = genCSEnum2bit(cseIndex);
433 if (IS_CSE_DEF(tree->gtCSEnum))
435 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit);
439 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit);
443 return WALK_CONTINUE;
446 // This functions walks all the node for an given tree
447 // and return the mask of CSE defs and uses for the tree
449 void Compiler::optCSE_GetMaskData(GenTree* tree, optCSE_MaskData* pMaskData)
451 pMaskData->CSE_defMask = BitVecOps::MakeEmpty(cseTraits);
452 pMaskData->CSE_useMask = BitVecOps::MakeEmpty(cseTraits);
453 fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
456 //------------------------------------------------------------------------
457 // optCSE_canSwap: Determine if the execution order of two nodes can be swapped.
460 // op1 - The first node
461 // op2 - The second node
464 // Return true iff it safe to swap the execution order of 'op1' and 'op2',
465 // considering only the locations of the CSE defs and uses.
468 // 'op1' currently occurse before 'op2' in the execution order.
470 bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
472 // op1 and op2 must be non-null.
473 assert(op1 != nullptr);
474 assert(op2 != nullptr);
476 bool canSwap = true; // the default result unless proven otherwise.
478 optCSE_MaskData op1MaskData;
479 optCSE_MaskData op2MaskData;
481 optCSE_GetMaskData(op1, &op1MaskData);
482 optCSE_GetMaskData(op2, &op2MaskData);
484 // We cannot swap if op1 contains a CSE def that is used by op2
485 if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask))
491 // We also cannot swap if op2 contains a CSE def that is used by op1.
492 if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask))
501 //------------------------------------------------------------------------
502 // optCSE_canSwap: Determine if the execution order of a node's operands can be swapped.
505 // tree - The node of interest
508 // Return true iff it safe to swap the execution order of the operands of 'tree',
509 // considering only the locations of the CSE defs and uses.
511 bool Compiler::optCSE_canSwap(GenTree* tree)
513 // We must have a binary treenode with non-null op1 and op2
514 assert((tree->OperKind() & GTK_SMPOP) != 0);
516 GenTree* op1 = tree->gtOp.gtOp1;
517 GenTree* op2 = tree->gtGetOp2();
519 return optCSE_canSwap(op1, op2);
522 /*****************************************************************************
524 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
525 * when (CodeOptKind() != Compiler::SMALL_CODE)
529 int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2)
531 CSEdsc* dsc1 = *(CSEdsc**)op1;
532 CSEdsc* dsc2 = *(CSEdsc**)op2;
534 GenTree* exp1 = dsc1->csdTree;
535 GenTree* exp2 = dsc2->csdTree;
539 diff = (int)(exp2->gtCostEx - exp1->gtCostEx);
546 // Sort the higher Use Counts toward the top
547 diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt);
554 // With the same use count, Sort the lower Def Counts toward the top
555 diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt);
562 // In order to ensure that we have a stable sort, we break ties using the csdIndex
563 return (int)(dsc1->csdIndex - dsc2->csdIndex);
566 /*****************************************************************************
568 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
569 * when (CodeOptKind() == Compiler::SMALL_CODE)
573 int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2)
575 CSEdsc* dsc1 = *(CSEdsc**)op1;
576 CSEdsc* dsc2 = *(CSEdsc**)op2;
578 GenTree* exp1 = dsc1->csdTree;
579 GenTree* exp2 = dsc2->csdTree;
583 diff = (int)(exp2->gtCostSz - exp1->gtCostSz);
590 // Sort the higher Use Counts toward the top
591 diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount);
598 // With the same use count, Sort the lower Def Counts toward the top
599 diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount);
606 // In order to ensure that we have a stable sort, we break ties using the csdIndex
607 return (int)(dsc1->csdIndex - dsc2->csdIndex);
610 /*****************************************************************************/
611 #if FEATURE_VALNUM_CSE
612 /*****************************************************************************/
614 /*****************************************************************************
616 * Initialize the Value Number CSE tracking logic.
619 void Compiler::optValnumCSE_Init()
625 // Init traits and full/empty bitvectors. This will be used to track the
626 // individual cse indexes.
627 cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this);
628 cseFull = BitVecOps::MakeFull(cseTraits);
630 /* Allocate and clear the hash bucket table */
632 optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
634 optCSECandidateCount = 0;
635 optDoCSE = false; // Stays false until we find duplicate CSE tree
637 // optCseCheckedBoundMap is unused in most functions, allocated only when used
638 optCseCheckedBoundMap = nullptr;
641 /*****************************************************************************
643 * Assign an index to the given expression (adding it to the lookup table,
644 * if necessary). Returns the index or 0 if the expression can not be a CSE.
647 unsigned Compiler::optValnumCSE_Index(GenTree* tree, GenTree* stmt)
654 ValueNum vnlib = tree->GetVN(VNK_Liberal);
656 /* Compute the hash value for the expression */
658 key = (unsigned)vnlib;
661 hash *= (unsigned)(s_optCSEhashSize + 1);
664 hval = hash % s_optCSEhashSize;
666 /* Look for a matching index in the hash table */
670 for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket)
672 if (hashDsc->csdHashValue == key)
674 treeStmtLst* newElem;
676 /* Have we started the list of matching nodes? */
678 if (hashDsc->csdTreeList == nullptr)
680 // Create the new element based upon the matching hashDsc element.
682 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
684 newElem->tslTree = hashDsc->csdTree;
685 newElem->tslStmt = hashDsc->csdStmt;
686 newElem->tslBlock = hashDsc->csdBlock;
687 newElem->tslNext = nullptr;
689 /* Start the list with the first CSE candidate recorded */
691 hashDsc->csdTreeList = newElem;
692 hashDsc->csdTreeLast = newElem;
695 noway_assert(hashDsc->csdTreeList);
697 /* Append this expression to the end of the list */
699 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
701 newElem->tslTree = tree;
702 newElem->tslStmt = stmt;
703 newElem->tslBlock = compCurBB;
704 newElem->tslNext = nullptr;
706 hashDsc->csdTreeLast->tslNext = newElem;
707 hashDsc->csdTreeLast = newElem;
709 optDoCSE = true; // Found a duplicate CSE tree
711 /* Have we assigned a CSE index? */
712 if (hashDsc->csdIndex == 0)
718 // Use this to see if this Value Number base CSE is also a lexical CSE
719 bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true);
722 assert(FitsIn<signed char>(hashDsc->csdIndex));
723 tree->gtCSEnum = ((signed char)hashDsc->csdIndex);
724 return hashDsc->csdIndex;
730 /* Not found, create a new entry (unless we have too many already) */
732 if (optCSECandidateCount < MAX_CSE_CNT)
734 hashDsc = new (this, CMK_CSE) CSEdsc;
736 hashDsc->csdHashValue = key;
737 hashDsc->csdIndex = 0;
738 hashDsc->csdLiveAcrossCall = 0;
739 hashDsc->csdDefCount = 0;
740 hashDsc->csdUseCount = 0;
741 hashDsc->csdDefWtCnt = 0;
742 hashDsc->csdUseWtCnt = 0;
744 hashDsc->csdTree = tree;
745 hashDsc->csdStmt = stmt;
746 hashDsc->csdBlock = compCurBB;
747 hashDsc->csdTreeList = nullptr;
749 /* Append the entry to the hash bucket */
751 hashDsc->csdNextInBucket = optCSEhash[hval];
752 optCSEhash[hval] = hashDsc;
756 else // newCSE is true
758 /* We get here only after finding a matching CSE */
760 /* Create a new CSE (unless we have the maximum already) */
762 if (optCSECandidateCount == MAX_CSE_CNT)
767 C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
769 unsigned CSEindex = ++optCSECandidateCount;
770 // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
772 /* Record the new CSE index in the hashDsc */
773 hashDsc->csdIndex = CSEindex;
775 /* Update the gtCSEnum field in the original tree */
776 noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
777 assert(FitsIn<signed char>(CSEindex));
779 hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex);
780 noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
782 tree->gtCSEnum = ((signed char)CSEindex);
787 EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex));
788 printf("\nCSE candidate #%02u, vn=", CSEindex);
790 printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(cseTraits, tempMask), compCurBB->bbNum,
791 tree->gtCostEx, tree->gtCostSz);
800 /*****************************************************************************
802 * Locate CSE candidates and assign indices to them
803 * return 0 if no CSE candidates were found
804 * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
807 unsigned Compiler::optValnumCSE_Locate()
809 // Locate CSE candidates and assign them indices
811 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
816 /* Make the block publicly available */
820 /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
821 /* Everyone who uses these flags are required to clear afterwards */
822 noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
824 /* Walk the statement trees in this basic block */
825 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
827 noway_assert(stmt->gtOper == GT_STMT);
829 /* We walk the tree in the forwards direction (bottom up) */
830 bool stmtHasArrLenCandidate = false;
831 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
833 if (tree->OperIsCompare() && stmtHasArrLenCandidate)
835 // Check if this compare is a function of (one of) the checked
836 // bound candidate(s); we may want to update its value number.
837 // if the array length gets CSEd
838 optCseUpdateCheckedBoundMap(tree);
841 if (!optIsCSEcandidate(tree))
846 ValueNum vnlib = tree->GetVN(VNK_Liberal);
848 if (ValueNumStore::isReservedVN(vnlib))
853 // Don't CSE constant values, instead let the Value Number
854 // based Assertion Prop phase handle them. Here, unlike
855 // the rest of optCSE, we use the conservative value number
856 // rather than the liberal one, since the conservative one
857 // is what the Value Number based Assertion Prop will use
858 // and the point is to avoid optimizing cases that it will
861 if (vnStore->IsVNConstant(tree->GetVN(VNK_Conservative)))
866 /* Assign an index to this expression */
868 unsigned CSEindex = optValnumCSE_Index(tree, stmt);
872 noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
875 if (IS_CSE_INDEX(CSEindex) && (tree->OperGet() == GT_ARR_LENGTH))
877 stmtHasArrLenCandidate = true;
883 /* We're done if there were no interesting expressions */
890 /* We're finished building the expression lookup table */
897 //------------------------------------------------------------------------
898 // optCseUpdateCheckedBoundMap: Check if this compare is a tractable function of
899 // a checked bound that is a CSE candidate, and insert
900 // an entry in the optCseCheckedBoundMap if so. This facilitates
901 // subsequently updating the compare's value number if
902 // the bound gets CSEd.
905 // compare - The compare node to check
907 void Compiler::optCseUpdateCheckedBoundMap(GenTree* compare)
909 assert(compare->OperIsCompare());
911 ValueNum compareVN = compare->gtVNPair.GetConservative();
912 VNFuncApp cmpVNFuncApp;
914 if (!vnStore->GetVNFunc(compareVN, &cmpVNFuncApp) ||
915 (cmpVNFuncApp.m_func != GetVNFuncForOper(compare->OperGet(), compare->IsUnsigned())))
917 // Value numbering inferred this compare as something other
918 // than its own operator; leave its value number alone.
922 // Now look for a checked bound feeding the compare
923 ValueNumStore::CompareCheckedBoundArithInfo info;
925 GenTree* boundParent = nullptr;
927 if (vnStore->IsVNCompareCheckedBound(compareVN))
929 // Simple compare of an bound against something else.
931 vnStore->GetCompareCheckedBound(compareVN, &info);
932 boundParent = compare;
934 else if (vnStore->IsVNCompareCheckedBoundArith(compareVN))
936 // Compare of a bound +/- some offset to something else.
938 GenTree* op1 = compare->gtGetOp1();
939 GenTree* op2 = compare->gtGetOp2();
941 vnStore->GetCompareCheckedBoundArithInfo(compareVN, &info);
942 if (GetVNFuncForOper(op1->OperGet(), op1->IsUnsigned()) == (VNFunc)info.arrOper)
944 // The arithmetic node is the bound's parent.
947 else if (GetVNFuncForOper(op2->OperGet(), op2->IsUnsigned()) == (VNFunc)info.arrOper)
949 // The arithmetic node is the bound's parent.
954 if (boundParent != nullptr)
956 GenTree* bound = nullptr;
958 // Find which child of boundParent is the bound. Abort if neither
959 // conservative value number matches the one from the compare VN.
961 GenTree* child1 = boundParent->gtGetOp1();
962 if ((info.vnBound == child1->gtVNPair.GetConservative()) && IS_CSE_INDEX(child1->gtCSEnum))
968 GenTree* child2 = boundParent->gtGetOp2();
969 if ((info.vnBound == child2->gtVNPair.GetConservative()) && IS_CSE_INDEX(child2->gtCSEnum))
975 if (bound != nullptr)
977 // Found a checked bound feeding a compare that is a tractable function of it;
978 // record this in the map so we can update the compare VN if the bound
981 if (optCseCheckedBoundMap == nullptr)
983 // Allocate map on first use.
984 optCseCheckedBoundMap = new (getAllocator()) NodeToNodeMap(getAllocator());
987 optCseCheckedBoundMap->Set(bound, compare);
992 /*****************************************************************************
994 * Compute each blocks bbCseGen
995 * This is the bitset that represents the CSEs that are generated within the block
997 void Compiler::optValnumCSE_InitDataFlow()
999 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1001 /* Initialize the blocks's bbCseIn set */
1003 bool init_to_zero = false;
1005 if (block == fgFirstBB)
1007 /* Clear bbCseIn for the entry block */
1008 init_to_zero = true;
1010 #if !CSE_INTO_HANDLERS
1013 if (bbIsHandlerBeg(block))
1015 /* Clear everything on entry to filters or handlers */
1016 init_to_zero = true;
1022 /* Initialize to {ZERO} prior to dataflow */
1023 block->bbCseIn = BitVecOps::MakeEmpty(cseTraits);
1027 /* Initialize to {ALL} prior to dataflow */
1028 block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull);
1031 block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull);
1033 /* Initialize to {ZERO} prior to locating the CSE candidates */
1034 block->bbCseGen = BitVecOps::MakeEmpty(cseTraits);
1037 // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
1038 // in the block's bbCseGen bitset
1040 for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
1042 CSEdsc* dsc = optCSEtab[cnt];
1043 unsigned CSEindex = dsc->csdIndex;
1044 treeStmtLst* lst = dsc->csdTreeList;
1047 while (lst != nullptr)
1049 BasicBlock* block = lst->tslBlock;
1050 BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex));
1056 // Dump out the bbCseGen information that we just created
1060 bool headerPrinted = false;
1061 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1063 if (block->bbCseGen != nullptr)
1067 printf("\nBlocks that generate CSE def/uses\n");
1068 headerPrinted = true;
1070 printf("BB%02u", block->bbNum);
1071 printf(" cseGen = %s\n", genES2str(cseTraits, block->bbCseGen));
1076 fgDebugCheckLinks();
1081 /*****************************************************************************
1083 * CSE Dataflow, so that all helper methods for dataflow are in a single place
1088 BitVecTraits* m_pBitVecTraits;
1089 EXPSET_TP m_preMergeOut;
1092 CSE_DataFlow(Compiler* pCompiler) : m_pBitVecTraits(pCompiler->cseTraits), m_preMergeOut(BitVecOps::UninitVal())
1096 // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
1097 void StartMerge(BasicBlock* block)
1099 BitVecOps::Assign(m_pBitVecTraits, m_preMergeOut, block->bbCseOut);
1102 // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
1103 void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
1105 BitVecOps::IntersectionD(m_pBitVecTraits, block->bbCseIn, predBlock->bbCseOut);
1108 // At the end of the merge store results of the dataflow equations, in a postmerge state.
1109 bool EndMerge(BasicBlock* block)
1111 BitVecOps::DataFlowD(m_pBitVecTraits, block->bbCseOut, block->bbCseGen, block->bbCseIn);
1112 return !BitVecOps::Equal(m_pBitVecTraits, block->bbCseOut, m_preMergeOut);
1116 /*****************************************************************************
1118 * Perform a DataFlow forward analysis using the block CSE bitsets:
1120 * bbCseGen - Exact CSEs that are become available within the block
1121 * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block
1122 * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block
1125 * bbCseIn - Computed CSEs that are available at input to the block
1126 * bbCseOut - Computed CSEs that are available at exit to the block
1129 void Compiler::optValnumCSE_DataFlow()
1131 CSE_DataFlow cse(this);
1133 // Modified dataflow algorithm for available expressions.
1134 DataFlow cse_flow(this);
1136 cse_flow.ForwardAnalysis(cse);
1141 printf("\nAfter performing DataFlow for ValnumCSE's\n");
1143 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1145 printf("BB%02u", block->bbNum);
1146 printf(" cseIn = %s", genES2str(cseTraits, block->bbCseIn));
1147 printf(" cseOut = %s", genES2str(cseTraits, block->bbCseOut));
1156 /*****************************************************************************
1158 * Using the information computed by CSE_DataFlow determine for each
1159 * CSE whether the CSE is a definition (if the CSE was not available)
1160 * or if the CSE is a use (if the CSE was previously made available)
1161 * The implementation iterates of all blocks setting 'available_cses'
1162 * to the CSEs that are available at input to the block.
1163 * When a CSE expression is encountered it is classified as either
1164 * as a definition (if the CSE is not in the 'available_cses' set) or
1165 * as a use (if the CSE is in the 'available_cses' set). If the CSE
1166 * is a definition then it is added to the 'available_cses' set.
1167 * In the Value Number based CSEs we do not need to have kill sets
1170 void Compiler::optValnumCSE_Availablity()
1175 printf("Labeling the CSEs with Use/Def information\n");
1178 EXPSET_TP available_cses = BitVecOps::MakeEmpty(cseTraits);
1180 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1185 /* Make the block publicly available */
1189 BitVecOps::Assign(cseTraits, available_cses, block->bbCseIn);
1191 optCSEweight = block->getBBWeight(this);
1193 /* Walk the statement trees in this basic block */
1195 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
1197 noway_assert(stmt->gtOper == GT_STMT);
1199 /* We walk the tree in the forwards direction (bottom up) */
1200 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
1202 if (IS_CSE_INDEX(tree->gtCSEnum))
1204 unsigned int cseBit = genCSEnum2bit(tree->gtCSEnum);
1205 CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum);
1206 unsigned stmw = block->getBBWeight(this);
1208 /* Is this expression available here? */
1210 if (BitVecOps::IsMember(cseTraits, available_cses, cseBit))
1212 /* This is a CSE use */
1214 desc->csdUseCount += 1;
1215 desc->csdUseWtCnt += stmw;
1219 if (tree->gtFlags & GTF_COLON_COND)
1221 // We can't create CSE definitions inside QMARK-COLON trees
1222 tree->gtCSEnum = NO_CSE;
1226 /* This is a CSE def */
1228 if (desc->csdDefCount == 0)
1230 // This is the first def visited, so copy its conservative VN
1231 desc->defConservativeVN = tree->gtVNPair.GetConservative();
1233 else if (tree->gtVNPair.GetConservative() != desc->defConservativeVN)
1235 // This candidate has defs with differing conservative VNs
1236 desc->defConservativeVN = ValueNumStore::NoVN;
1239 desc->csdDefCount += 1;
1240 desc->csdDefWtCnt += stmw;
1242 /* Mark the node as a CSE definition */
1244 tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
1246 /* This CSE will be available after this def */
1247 BitVecOps::AddElemD(cseTraits, available_cses, cseBit);
1250 if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
1252 printf("BB%02u ", block->bbNum);
1254 printf(" %s of CSE #%02u [weight=%s]\n", IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def",
1255 GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw));
1264 // The following class handles the CSE heuristics
1265 // we use a complex set of heuristic rules
1266 // to determine if it is likely to be profitable to perform this CSE
1270 Compiler* m_pCompiler;
1271 unsigned m_addCSEcount;
1273 unsigned aggressiveRefCnt;
1274 unsigned moderateRefCnt;
1275 unsigned enregCount; // count of the number of enregisterable variables
1278 Compiler::codeOptimize codeOptKind;
1279 Compiler::CSEdsc** sortTab;
1287 CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler)
1289 codeOptKind = m_pCompiler->compCodeOpt();
1292 Compiler::codeOptimize CodeOptKind()
1297 // Perform the Initialization step for our CSE Heuristics
1298 // determine the various cut off values to use for
1299 // the aggressive, moderate and conservative CSE promotions
1300 // count the number of enregisterable variables
1301 // determine if the method has a large or huge stack frame.
1305 m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
1307 // Record the weighted ref count of the last "for sure" callee saved LclVar
1308 aggressiveRefCnt = 0;
1316 #ifdef _TARGET_XARCH_
1317 if (m_pCompiler->compLongUsed)
1323 unsigned frameSize = 0;
1324 unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
1328 for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
1330 if (varDsc->lvRefCnt == 0)
1335 #if FEATURE_FIXED_OUT_ARGS
1336 // Skip the OutgoingArgArea in computing frame size, since
1337 // its size is not yet known and it doesn't affect local
1338 // offsets from the frame pointer (though it may affect
1339 // them from the stack pointer).
1340 noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
1341 if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar)
1345 #endif // FEATURE_FIXED_OUT_ARGS
1347 bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
1349 // Some LclVars always have stack homes
1350 if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
1356 // Treat floating point and 64 bit integers as always on the stack
1357 if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet()))
1363 frameSize += m_pCompiler->lvaLclSize(lclNum);
1367 // For the purposes of estimating the frameSize we
1368 // will consider this LclVar as being enregistered.
1369 // Now we reduce the remaining regAvailEstimate by
1370 // an appropriate amount.
1371 if (varDsc->lvRefCnt <= 2)
1373 // a single use single def LclVar only uses 1
1374 regAvailEstimate -= 1;
1378 // a LclVar with multiple uses and defs uses 2
1379 if (regAvailEstimate >= 2)
1381 regAvailEstimate -= 2;
1385 // Don't try to subtract when regAvailEstimate is 1
1386 regAvailEstimate = 0;
1390 #ifdef _TARGET_XARCH_
1391 if (frameSize > 0x080)
1393 // We likely have a large stack frame.
1394 // Thus we might need to use large displacements when loading or storing
1395 // to CSE LclVars that are not enregistered
1397 break; // early out, we don't need to keep increasing frameSize
1399 #else // _TARGET_ARM_
1400 if (frameSize > 0x0400)
1404 if (frameSize > 0x10000)
1412 unsigned sortNum = 0;
1413 while (sortNum < m_pCompiler->lvaTrackedCount)
1415 LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
1416 var_types varTyp = varDsc->TypeGet();
1418 if (varDsc->lvDoNotEnregister)
1423 if (!varTypeIsFloating(varTyp))
1425 // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
1426 // Note that this makes genTypeStSz return 1.
1427 if (varTypeIsStruct(varTyp))
1429 varTyp = TYP_STRUCT;
1431 enregCount += genTypeStSz(varTyp);
1434 if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
1436 if (CodeOptKind() == Compiler::SMALL_CODE)
1438 aggressiveRefCnt = varDsc->lvRefCnt + BB_UNITY_WEIGHT;
1442 aggressiveRefCnt = varDsc->lvRefCntWtd + BB_UNITY_WEIGHT;
1445 if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
1447 if (CodeOptKind() == Compiler::SMALL_CODE)
1449 moderateRefCnt = varDsc->lvRefCnt;
1453 moderateRefCnt = varDsc->lvRefCntWtd;
1458 // use smaller value for mult when enregCount is in [0..4]
1459 if (enregCount <= 4)
1461 mult = (enregCount <= 2) ? 1 : 2;
1464 aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
1465 moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
1468 if (m_pCompiler->verbose)
1471 printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
1472 printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
1473 printf("Framesize estimate is 0x%04X\n", frameSize);
1474 printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
1479 void SortCandidates()
1481 /* Create an expression table sorted by decreasing cost */
1482 sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
1484 sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
1485 memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
1487 if (CodeOptKind() == Compiler::SMALL_CODE)
1489 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
1493 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
1497 if (m_pCompiler->verbose)
1499 printf("\nSorted CSE candidates:\n");
1500 /* Print out the CSE candidates */
1502 for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
1504 Compiler::CSEdsc* dsc = sortTab[cnt];
1505 GenTree* expr = dsc->csdTree;
1510 if (CodeOptKind() == Compiler::SMALL_CODE)
1512 def = dsc->csdDefCount; // def count
1513 use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
1517 def = dsc->csdDefWtCnt; // weighted def count
1518 use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1521 tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex));
1522 printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
1523 genES2str(m_pCompiler->cseTraits, tempMask), dsc->csdUseCount, def, use);
1525 m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
1532 // The following class nested within CSE_Heuristic encapsulates the information
1533 // about the current CSE candidate that is under consideration
1535 // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
1536 // and needs to be reworked for the Value Number based implementation
1540 CSE_Heuristic* m_context;
1541 Compiler::CSEdsc* m_CseDsc;
1543 unsigned m_cseIndex;
1545 unsigned m_defCount;
1546 unsigned m_useCount;
1552 CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
1554 m_cseIndex = m_CseDsc->csdIndex;
1557 Compiler::CSEdsc* CseDsc()
1573 // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
1576 return m_CseDsc->csdTree;
1587 bool LiveAcrossCall()
1589 return (m_CseDsc->csdLiveAcrossCall != 0);
1592 void InitializeCounts()
1594 if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
1596 m_Cost = Expr()->gtCostSz; // the estimated code size
1597 m_Size = Expr()->gtCostSz; // always the gtCostSz
1598 m_defCount = m_CseDsc->csdDefCount; // def count
1599 m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
1603 m_Cost = Expr()->gtCostEx; // the estimated execution cost
1604 m_Size = Expr()->gtCostSz; // always the gtCostSz
1605 m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
1606 m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1612 //------------------------------------------------------------------------
1613 // optConfigBiasedCSE:
1614 // Stress mode to shuffle the decision to CSE or not using environment
1615 // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value
1616 // is not specified but COMPlus_JitStress is ON, generate a random bias.
1619 // 0 -- This method is indifferent about this CSE (no bias specified and no stress)
1620 // 1 -- This CSE must be performed to maintain specified/generated bias.
1621 // -1 -- This CSE mustn't be performed to maintain specified/generated bias.
1624 // A debug stress only method that returns "1" with probability (P)
1627 // P = (COMPlus_JitStressBiasedCSE / 100) (or)
1628 // P = (random(100) / 100) when COMPlus_JitStress is specified and
1629 // COMPlus_JitStressBiasedCSE is unspecified.
1631 // When specified, the bias is reinterpreted as a decimal number between 0
1633 // When bias is not specified, a bias is randomly generated if COMPlus_JitStress
1636 // Callers are supposed to call this method for each CSE promotion decision
1637 // and ignore the call if return value is 0 and honor the 1 with a CSE and
1638 // -1 with a no-CSE to maintain the specified/generated bias.
1640 int optConfigBiasedCSE()
1642 // Seed the PRNG, if never done before.
1643 if (!m_cseRNG.IsInitialized())
1645 m_cseRNG.Init(m_pCompiler->info.compMethodHash());
1646 m_bias = m_cseRNG.Next(100);
1649 // Obtain the bias value and reinterpret as decimal.
1650 unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE());
1652 // Invalid value, check if JitStress is ON.
1655 if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
1657 // JitStress is OFF for CSE, nothing to do.
1661 JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias);
1664 // Generate a number between (0, 99) and if the generated
1665 // number is smaller than bias, then perform CSE.
1666 unsigned gen = m_cseRNG.Next(100);
1667 int ret = (gen < bias) ? 1 : -1;
1669 if (m_pCompiler->verbose)
1673 printf("No CSE because gen=%d >= bias=%d\n", gen, bias);
1677 printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias);
1681 // Indicate whether to perform CSE or not.
1686 // Given a CSE candidate decide whether it passes or fails the profitablity heuristic
1687 // return true if we believe that it is profitable to promote this candidate to a CSE
1689 bool PromotionCheck(CSE_Candidate* candidate)
1691 bool result = false;
1694 int stressResult = optConfigBiasedCSE();
1695 if (stressResult != 0)
1697 // Stress is enabled. Check whether to perform CSE or not.
1698 return (stressResult > 0);
1701 if (m_pCompiler->optConfigDisableCSE2())
1703 return false; // skip this CSE
1708 Our calculation is based on the following cost estimate formula
1714 If we introduce a CSE temp are each definition and
1715 replace the use with a CSE temp then our cost is:
1717 (def * (cost + cse-def-cost)) + (use * cse-use-cost)
1719 We must estimate the values to use for cse-def-cost and cse-use-cost
1721 If we are able to enregister the CSE then the cse-use-cost is one
1722 and cse-def-cost is either zero or one. Zero in the case where
1723 we needed to evaluate the def into a register and we can use that
1724 register as the CSE temp as well.
1726 If we are unable to enregister the CSE then the cse-use-cost is IND_COST
1727 and the cse-def-cost is also IND_COST.
1729 If we want to be conservative we use IND_COST as the the value
1730 for both cse-def-cost and cse-use-cost and then we never introduce
1731 a CSE that could pessimize the execution time of the method.
1733 If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
1734 values for both cse-def-cost and cse-use-cost.
1736 If we want to be aggressive we use 1 as the values for both
1737 cse-def-cost and cse-use-cost.
1739 If we believe that the CSE very valuable in terms of weighted ref counts
1740 such that it would always be enregistered by the register allocator we choose
1741 the aggressive use def costs.
1743 If we believe that the CSE is somewhat valuable in terms of weighted ref counts
1744 such that it could be likely be enregistered by the register allocator we choose
1745 the moderate use def costs.
1747 otherwise we choose the conservative use def costs.
1751 unsigned cse_def_cost;
1752 unsigned cse_use_cost;
1754 unsigned no_cse_cost = 0;
1755 unsigned yes_cse_cost = 0;
1756 unsigned extra_yes_cost = 0;
1757 unsigned extra_no_cost = 0;
1759 // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
1760 // Each CSE Def will contain two Refs and each CSE Use wil have one Ref of this new LclVar
1761 unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
1763 if (CodeOptKind() == Compiler::SMALL_CODE)
1765 if (cseRefCnt >= aggressiveRefCnt)
1768 if (m_pCompiler->verbose)
1770 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1776 if (candidate->LiveAcrossCall() != 0)
1790 else if (largeFrame)
1793 if (m_pCompiler->verbose)
1795 printf("Codesize CSE Promotion (large frame)\n");
1798 #ifdef _TARGET_XARCH_
1799 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1800 cse_def_cost = 6; // mov [EBP-0x00001FC],reg
1801 cse_use_cost = 5; // [EBP-0x00001FC]
1802 #else // _TARGET_ARM_
1805 cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
1810 cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
1818 if (m_pCompiler->verbose)
1820 printf("Codesize CSE Promotion (small frame)\n");
1823 #ifdef _TARGET_XARCH_
1824 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1825 cse_def_cost = 3; // mov [EBP-1C],reg
1826 cse_use_cost = 2; // [EBP-1C]
1827 #else // _TARGET_ARM_
1828 cse_def_cost = 2; // str reg,[sp+0x9c]
1829 cse_use_cost = 2; // ldr reg,[sp+0x9c]
1833 else // not SMALL_CODE ...
1835 if (cseRefCnt >= aggressiveRefCnt)
1838 if (m_pCompiler->verbose)
1840 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1846 else if (cseRefCnt >= moderateRefCnt)
1849 if (candidate->LiveAcrossCall() == 0)
1852 if (m_pCompiler->verbose)
1854 printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
1861 else // candidate is live across call
1864 if (m_pCompiler->verbose)
1866 printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
1871 extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
1875 else // Conservative CSE promotion
1877 if (candidate->LiveAcrossCall() == 0)
1880 if (m_pCompiler->verbose)
1882 printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt,
1889 else // candidate is live across call
1892 if (m_pCompiler->verbose)
1894 printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
1899 extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
1903 // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
1904 if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
1923 // estimate the cost from lost codesize reduction if we do not perform the CSE
1924 if (candidate->Size() > cse_use_cost)
1926 Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
1928 extra_no_cost = candidate->Size() - cse_use_cost;
1929 extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
1932 /* no_cse_cost is the cost estimate when we decide not to make a CSE */
1933 /* yes_cse_cost is the cost estimate when we decide to make a CSE */
1935 no_cse_cost = candidate->UseCount() * candidate->Cost();
1936 yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
1938 #if CPU_LONG_USES_REGPAIR
1939 if (candidate->Expr()->TypeGet() == TYP_LONG)
1944 no_cse_cost += extra_no_cost;
1945 yes_cse_cost += extra_yes_cost;
1948 if (m_pCompiler->verbose)
1950 printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
1952 printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(),
1953 candidate->Cost(), candidate->Size());
1954 printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
1955 extra_no_cost, extra_yes_cost);
1957 printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
1958 (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
1962 // Should we make this candidate into a CSE?
1963 // Is the yes cost less than the no cost
1965 if (yes_cse_cost <= no_cse_cost)
1967 result = true; // Yes make this a CSE
1971 /* In stress mode we will make some extra CSEs */
1972 if (no_cse_cost > 0)
1974 int percentage = (no_cse_cost * 100) / yes_cse_cost;
1976 if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
1978 result = true; // Yes make this a CSE
1986 // PerformCSE() takes a successful candidate and performs the appropriate replacements:
1988 // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
1989 // and will replace all of the CSE uses with reads of the "cse0" LclVar
1991 void PerformCSE(CSE_Candidate* successfulCandidate)
1993 unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
1995 if (successfulCandidate->LiveAcrossCall() != 0)
1997 // As we introduce new LclVars for these CSE we slightly
1998 // increase the cutoffs for aggressive and moderate CSE's
2000 int incr = BB_UNITY_WEIGHT;
2002 #if CPU_LONG_USES_REGPAIR
2003 if (successfulCandidate->Expr()->TypeGet() == TYP_LONG)
2007 if (cseRefCnt > aggressiveRefCnt)
2009 aggressiveRefCnt += incr;
2012 if (cseRefCnt > moderateRefCnt)
2014 moderateRefCnt += (incr / 2);
2018 /* Introduce a new temp for the CSE */
2020 // we will create a long lifetime temp for the new cse LclVar
2021 unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
2022 var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
2023 if (varTypeIsStruct(cseLclVarTyp))
2025 m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
2027 m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
2028 m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
2030 m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
2031 m_pCompiler->optCSEcount++;
2033 ValueNum defConservativeVN = successfulCandidate->CseDsc()->defConservativeVN;
2035 /* Walk all references to this CSE, adding an assignment
2036 to the CSE temp to all defs and changing all refs to
2037 a simple use of the CSE temp.
2039 We also unmark nested CSE's for all uses.
2042 Compiler::treeStmtLst* lst;
2043 lst = successfulCandidate->CseDsc()->csdTreeList;
2046 #define QQQ_CHECK_CSE_VNS 0
2047 #if QQQ_CHECK_CSE_VNS
2048 assert(lst != NULL);
2049 ValueNum firstVN = lst->tslTree->gtVN;
2051 bool allSame = true;
2054 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
2056 if (lst->tslTree->gtVN != firstVN)
2066 lst = dsc->csdTreeList;
2067 GenTree* firstTree = lst->tslTree;
2068 printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName,
2069 GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
2072 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
2074 printf("0x%x(%s,%d) ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d",
2075 lst->tslTree->gtVN);
2081 lst = dsc->csdTreeList;
2086 /* Process the next node in the list */
2087 GenTree* exp = lst->tslTree;
2088 GenTree* stm = lst->tslStmt;
2089 noway_assert(stm->gtOper == GT_STMT);
2090 BasicBlock* blk = lst->tslBlock;
2092 /* Advance to the next node in the list */
2095 // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
2096 assert(exp->gtOper != GT_COUNT);
2098 /* Ignore the node if it's not been marked as a CSE */
2099 if (!IS_CSE_INDEX(exp->gtCSEnum))
2104 /* Make sure we update the weighted ref count correctly */
2105 m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
2107 /* Figure out the actual type of the value */
2108 var_types expTyp = genActualType(exp->TypeGet());
2109 noway_assert(expTyp == cseLclVarTyp);
2111 // This will contain the replacement tree for exp
2112 // It will either be the CSE def or CSE ref
2114 GenTree* cse = nullptr;
2116 FieldSeqNode* fldSeq = nullptr;
2117 bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
2119 if (IS_CSE_USE(exp->gtCSEnum))
2121 /* This is a use of the CSE */
2124 if (m_pCompiler->verbose)
2126 printf("\nWorking on the replacement of the CSE #%02u use at ", exp->gtCSEnum);
2127 Compiler::printTreeID(exp);
2128 printf(" in BB%02u\n", blk->bbNum);
2132 /* check for and collect any SIDE_EFFECTS */
2133 GenTree* sideEffList = nullptr;
2135 if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
2137 // Extract any side effects from exp
2139 m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
2141 if (sideEffList != nullptr)
2143 noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
2145 if (m_pCompiler->verbose)
2147 printf("\nThis CSE use has persistent side effects. Extracted side effects...\n");
2148 m_pCompiler->gtDispTree(sideEffList);
2155 // We will replace the CSE ref with a new tree
2156 // this is typically just a simple use of the new CSE LclVar
2158 cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2159 cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
2160 if (defConservativeVN != ValueNumStore::NoVN)
2162 // All defs of this CSE share the same conservative VN, and we are rewriting this
2163 // use to fetch the same value with no reload, so we can safely propagate that
2164 // conservative VN to this use. This can help range check elimination later on.
2165 cse->gtVNPair.SetConservative(defConservativeVN);
2167 // If the old VN was flagged as a checked bound, propagate that to the new VN
2168 // to make sure assertion prop will pay attention to this VN.
2169 ValueNumStore* vnStore = m_pCompiler->vnStore;
2170 ValueNum oldVN = exp->gtVNPair.GetConservative();
2171 if (!vnStore->IsVNConstant(defConservativeVN) && vnStore->IsVNCheckedBound(oldVN))
2173 vnStore->SetVNIsCheckedBound(defConservativeVN);
2177 if ((m_pCompiler->optCseCheckedBoundMap != nullptr) &&
2178 (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp)))
2180 // Propagate the new value number to this compare node as well, since
2181 // subsequent range check elimination will try to correlate it with
2182 // the other appearances that are getting CSEd.
2184 ValueNum oldCmpVN = cmp->gtVNPair.GetConservative();
2185 ValueNum newCmpArgVN;
2187 ValueNumStore::CompareCheckedBoundArithInfo info;
2188 if (vnStore->IsVNCompareCheckedBound(oldCmpVN))
2190 // Comparison is against the bound directly.
2192 newCmpArgVN = defConservativeVN;
2193 vnStore->GetCompareCheckedBound(oldCmpVN, &info);
2197 // Comparison is against the bound +/- some offset.
2199 assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN));
2200 vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info);
2201 newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper,
2202 info.arrOp, defConservativeVN);
2204 ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper,
2205 info.cmpOp, newCmpArgVN);
2206 cmp->gtVNPair.SetConservative(newCmpVN);
2210 cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
2213 // Now we need to unmark any nested CSE's uses that are found in 'exp'
2214 // As well we extract any nested CSE defs that are found in 'exp' and
2215 // these are appended to the sideEffList
2217 // Afterwards the set of nodes in the 'sideEffectList' are preserved and
2218 // all other nodes are removed and have their ref counts decremented
2220 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2221 bool result = m_pCompiler->optValnumCSE_UnmarkCSEs(exp, &sideEffList);
2223 // When 'result' is false we ran into a case where 'exp contains a nested CSE use
2224 // that has persistent side effects. It is very difficult to construct the proper
2225 // side effect list for this case.
2226 // Additionally this case is extremely uncommon, so we just give up on replacing
2227 // this particular CSE use when we have this case. [VSO 566984]
2229 if (result == false)
2232 if (m_pCompiler->verbose)
2234 printf("\nThis CSE use has a nested CSE defs with persistent side effects...\n");
2235 m_pCompiler->gtDispTree(exp);
2236 printf("Abandoning this CSE use subsitution\n");
2241 else // We now perform the replacement of the CSE use
2244 if (m_pCompiler->verbose)
2246 printf("\nCSE #%02u use at ", exp->gtCSEnum);
2247 Compiler::printTreeID(exp);
2248 printf(" replaced in BB%02u with temp use.\n", blk->bbNum);
2251 // If we have any side effects or extracted CSE defs then we need to create a GT_COMMA tree instead
2256 if (m_pCompiler->verbose)
2258 printf("\nThis CSE use has side effects and/or nested CSE defs. The sideEffectList:\n");
2259 m_pCompiler->gtDispTree(sideEffList);
2264 GenTree* cseVal = cse;
2265 GenTree* curSideEff = sideEffList;
2266 ValueNumStore* vnStore = m_pCompiler->vnStore;
2267 ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
2269 while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
2271 GenTree* op1 = curSideEff->gtOp.gtOp1;
2272 GenTree* op2 = curSideEff->gtOp.gtOp2;
2274 ValueNumPair op1vnp;
2275 ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
2276 vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
2278 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
2282 // We may have inserted a narrowing cast during a previous remorph
2283 // and it will not have a value number.
2284 if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
2286 // The inserted cast will have no exceptional effects
2287 assert(curSideEff->gtOverflow() == false);
2288 // Process the exception effects from the cast's operand.
2289 curSideEff = curSideEff->gtOp.gtOp1;
2292 ValueNumPair op2vnp;
2293 ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2294 vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
2295 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2297 op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2298 vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
2299 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2301 // Create a comma node with the sideEffList as op1
2302 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
2303 cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
2309 /* This is a def of the CSE */
2312 if (m_pCompiler->verbose)
2314 printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
2315 Compiler::printTreeID(exp);
2316 printf(" replaced in BB%02u with def of V%02u\n", blk->bbNum, cseLclVarNum);
2320 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2324 /* Create an assignment of the value to the temp */
2325 GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
2327 // assign the proper Value Numbers
2328 asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
2329 asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
2331 noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
2332 noway_assert(asg->gtOp.gtOp2 == val);
2334 /* Create a reference to the CSE temp */
2335 GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2336 ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val'
2338 // If it has a zero-offset field seq, copy annotation to the ref
2339 if (hasZeroMapAnnotation)
2341 m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
2344 /* Create a comma node for the CSE assignment */
2345 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
2346 cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
2347 // as the assignment to the CSE LclVar
2348 // cannot add any new exceptions
2351 // Increment ref count for the CSE ref
2352 m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
2356 // Also increment ref count for the CSE assignment
2357 m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
2360 // Walk the statement 'stm' and find the pointer
2361 // in the tree is pointing to 'exp'
2363 GenTree** link = m_pCompiler->gtFindLink(stm, exp);
2366 if (link == nullptr)
2368 printf("\ngtFindLink failed: stm=");
2369 Compiler::printTreeID(stm);
2371 Compiler::printTreeID(exp);
2374 m_pCompiler->gtDispTree(stm);
2377 m_pCompiler->gtDispTree(exp);
2384 // Mutate this link, thus replacing the old exp with the new cse representation
2388 // If it has a zero-offset field seq, copy annotation.
2389 if (hasZeroMapAnnotation)
2391 m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
2394 assert(m_pCompiler->fgRemoveRestOfBlock == false);
2396 /* re-morph the statement */
2397 m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE"));
2399 } while (lst != nullptr);
2402 // Consider each of the CSE candidates and if the CSE passes
2403 // the PromotionCheck then transform the CSE by calling PerformCSE
2405 void ConsiderCandidates()
2407 /* Consider each CSE candidate, in order of decreasing cost */
2408 unsigned cnt = m_pCompiler->optCSECandidateCount;
2409 Compiler::CSEdsc** ptr = sortTab;
2410 for (; (cnt > 0); cnt--, ptr++)
2412 Compiler::CSEdsc* dsc = *ptr;
2413 CSE_Candidate candidate(this, dsc);
2415 candidate.InitializeCounts();
2417 if (candidate.UseCount() == 0)
2420 if (m_pCompiler->verbose)
2422 printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
2429 if (m_pCompiler->verbose)
2431 printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n", candidate.CseIndex(),
2432 candidate.DefCount(), candidate.UseCount(), candidate.Cost());
2433 m_pCompiler->gtDispTree(candidate.Expr());
2438 if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
2440 // If we reach this point, then the CSE def was incorrectly marked or the
2441 // block with this use is unreachable. So skip and go to the next CSE.
2442 // Without the "continue", we'd generate bad code in retail.
2443 // Commented out a noway_assert(false) here due to bug: 3290124.
2444 // The problem is if there is sub-graph that is not reachable from the
2445 // entry point, the CSE flags propagated, would be incorrect for it.
2449 bool doCSE = PromotionCheck(&candidate);
2452 if (m_pCompiler->verbose)
2456 printf("\nPromoting CSE:\n");
2460 printf("Did Not promote this CSE\n");
2467 PerformCSE(&candidate);
2472 // Perform the necessary cleanup after our CSE heuristics have run
2476 if (m_addCSEcount > 0)
2478 /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */
2479 m_pCompiler->lvaSortAgain = true;
2484 /*****************************************************************************
2486 * Routine for performing the Value Number based CSE using our heuristics
2489 void Compiler::optValnumCSE_Heuristic()
2494 printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
2495 fgDumpTrees(fgFirstBB, nullptr);
2500 CSE_Heuristic cse_heuristic(this);
2502 cse_heuristic.Initialize();
2503 cse_heuristic.SortCandidates();
2504 cse_heuristic.ConsiderCandidates();
2505 cse_heuristic.Cleanup();
2508 /*****************************************************************************
2510 * Routine to unmark any CSEs contained within a tree
2511 * - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept
2515 bool Compiler::optValnumCSE_UnmarkCSEs(GenTree* candidateTree, GenTree** wbKeepList)
2517 assert(optValnumCSE_phase);
2519 // We need to communicate the 'keepList' to both optHasCSEdefWithSideeffect
2520 // and optUnmarkCSEs as any part of the 'candidateTree' tree that is in the
2521 // keepList is preserved and is not deleted and does not have its ref counts
2523 // We communicate this value using the walkData.pCallbackData field
2526 // First check for the rare case where we have a nested CSE def that has
2527 // side-effects and return false whenever we have this case
2529 Compiler::fgWalkResult defWithSideEffectResult =
2530 fgWalkTreePre(&candidateTree, optHasCSEdefWithSideeffect, (void*)wbKeepList);
2531 bool hasPersistentSideEffect = (defWithSideEffectResult == WALK_ABORT);
2532 if (hasPersistentSideEffect)
2538 Compiler::fgWalkResult unmarkResult = fgWalkTreePre(&candidateTree, optUnmarkCSEs, (void*)wbKeepList);
2539 assert(unmarkResult != WALK_ABORT);
2545 /*****************************************************************************
2547 * Perform common sub-expression elimination.
2550 void Compiler::optOptimizeValnumCSEs()
2555 printf("\n*************** In optOptimizeValnumCSEs()\n");
2558 if (optConfigDisableCSE())
2560 return; // Disabled by JitNoCSE
2564 optValnumCSE_phase = true;
2566 /* Initialize the expression tracking logic */
2568 optValnumCSE_Init();
2570 /* Locate interesting expressions and assign indices to them */
2572 if (optValnumCSE_Locate() > 0)
2574 optCSECandidateTotal += optCSECandidateCount;
2576 optValnumCSE_InitDataFlow();
2578 optValnumCSE_DataFlow();
2580 optValnumCSE_Availablity();
2582 optValnumCSE_Heuristic();
2585 optValnumCSE_phase = false;
2588 #endif // FEATURE_VALNUM_CSE
2590 /*****************************************************************************
2592 * The following determines whether the given expression is a worthy CSE
2595 bool Compiler::optIsCSEcandidate(GenTree* tree)
2597 /* No good if the expression contains side effects or if it was marked as DONT CSE */
2599 if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE))
2604 /* The only reason a TYP_STRUCT tree might occur is as an argument to
2605 GT_ADDR. It will never be actually materialized. So ignore them.
2608 var_types type = tree->TypeGet();
2609 genTreeOps oper = tree->OperGet();
2611 // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
2612 // to use regular assignments, or handling copyObj.
2613 if (varTypeIsStruct(type) || type == TYP_VOID)
2619 if (type == TYP_FLOAT)
2621 // TODO-X86-CQ: Revisit this
2622 // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
2626 if (oper == GT_CNS_DBL)
2628 // TODO-CQ: Revisit this
2629 // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
2635 if (compCodeOpt() == SMALL_CODE)
2637 cost = tree->gtCostSz;
2641 cost = tree->gtCostEx;
2644 /* Don't bother if the potential savings are very low */
2645 if (cost < MIN_CSE_COST)
2651 /* Don't bother with constants */
2652 if (tree->OperKind() & GTK_CONST)
2656 /* Check for some special cases */
2661 // If we have a simple helper call with no other persistent side-effects
2662 // then we allow this tree to be a CSE candidate
2664 if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false)
2670 // Calls generally cannot be CSE-ed
2675 // TODO-CQ: Review this...
2676 /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
2677 Doing the first allows cse to also kick in for code like
2678 "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
2679 the second would not allow it */
2681 return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
2687 return true; // We reach here only when CSE_CONSTS is enabled
2696 return false; // Can't CSE a volatile LCL_VAR
2701 return true; // CSE these Unary Operators
2715 return true; // CSE these Binary Operators
2717 case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
2720 if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
2731 return true; // Also CSE these Comparison Operators
2734 return true; // Intrinsics
2737 return true; // Allow GT_COMMA nodes to be CSE-ed.
2743 return false; // Currently the only special nodes that we hit
2744 // that we know that we don't want to CSE
2747 break; // Any new nodes that we might add later...
2755 // A Debug only method that allows you to control whether the CSE logic is enabled for this method.
2757 // If this method returns false then the CSE phase should be performed.
2758 // If the method returns true then the CSE phase should be skipped.
2760 bool Compiler::optConfigDisableCSE()
2762 // Next check if COMPlus_JitNoCSE is set and applies to this method
2764 unsigned jitNoCSE = JitConfig.JitNoCSE();
2768 unsigned methodCount = Compiler::jitTotalMethodCompiled;
2769 if ((jitNoCSE & 0xF000000) == 0xF000000)
2771 unsigned methodCountMask = methodCount & 0xFFF;
2772 unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF;
2773 unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF;
2775 if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
2779 printf(" Disabled by JitNoCSE methodCountMask\n");
2782 return true; // The CSE phase for this method is disabled
2785 else if (jitNoCSE <= (methodCount + 1))
2789 printf(" Disabled by JitNoCSE > methodCount\n");
2792 return true; // The CSE phase for this method is disabled
2800 // A Debug only method that allows you to control whether the CSE logic is enabled for
2801 // a particular CSE in a method
2803 // If this method returns false then the CSE should be performed.
2804 // If the method returns true then the CSE should be skipped.
2806 bool Compiler::optConfigDisableCSE2()
2808 static unsigned totalCSEcount = 0;
2810 unsigned jitNoCSE2 = JitConfig.JitNoCSE2();
2816 if ((jitNoCSE2 & 0xF000000) == 0xF000000)
2818 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2819 unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF;
2820 unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF;
2822 if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero))
2826 printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
2831 else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
2833 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2834 unsigned disableMask = jitNoCSE2 & 0xFFF;
2836 disableMask >>= (totalCSEMask % 12);
2838 if (disableMask & 1)
2842 printf(" Disabled by jitNoCSE2 rotating disable mask\n");
2847 else if (jitNoCSE2 <= totalCSEcount)
2851 printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
2860 void Compiler::optOptimizeCSEs()
2865 printf("\n*************** In optOptimizeCSEs()\n");
2866 printf("Blocks/Trees at start of optOptimizeCSE phase\n");
2867 fgDispBasicBlocks(true);
2871 optCSECandidateCount = 0;
2872 optCSEstart = lvaCount;
2874 #if FEATURE_VALNUM_CSE
2875 INDEBUG(optEnsureClearCSEInfo());
2876 optOptimizeValnumCSEs();
2877 EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
2878 #endif // FEATURE_VALNUM_CSE
2881 /*****************************************************************************
2883 * Cleanup after CSE to allow us to run more than once.
2886 void Compiler::optCleanupCSEs()
2888 // We must clear the BBF_VISITED and BBF_MARKED flags
2890 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2892 // And clear all the "visited" bits on the block
2894 block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
2896 /* Walk the statement trees in this basic block */
2900 // Initialize 'stmt' to the first non-Phi statement
2901 stmt = block->FirstNonPhiDef();
2903 for (; stmt; stmt = stmt->gtNext)
2905 noway_assert(stmt->gtOper == GT_STMT);
2907 /* We must clear the gtCSEnum field */
2908 for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2910 tree->gtCSEnum = NO_CSE;
2918 /*****************************************************************************
2920 * Ensure that all the CSE information in the IR is initialized the way we expect it,
2921 * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
2924 void Compiler::optEnsureClearCSEInfo()
2926 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2928 assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
2930 /* Walk the statement trees in this basic block */
2934 // Initialize 'stmt' to the first non-Phi statement
2935 stmt = block->FirstNonPhiDef();
2937 for (; stmt; stmt = stmt->gtNext)
2939 assert(stmt->gtOper == GT_STMT);
2941 for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2943 assert(tree->gtCSEnum == NO_CSE);
2951 /*****************************************************************************/
2952 #endif // FEATURE_ANYCSE
2953 /*****************************************************************************/