1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 /*****************************************************************************/
21 /*****************************************************************************/
24 const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2;
26 /*****************************************************************************
28 * We've found all the candidates, build the index for easy access.
31 void Compiler::optCSEstop()
33 if (optCSECandidateCount == 0)
42 optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
44 for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++)
46 for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
50 noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
51 if (optCSEtab[dsc->csdIndex - 1] == nullptr)
53 optCSEtab[dsc->csdIndex - 1] = dsc;
60 for (cnt = 0; cnt < optCSECandidateCount; cnt++)
62 noway_assert(optCSEtab[cnt] != nullptr);
67 /*****************************************************************************
69 * Return the descriptor for the CSE with the given index.
72 inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index)
75 noway_assert(index <= optCSECandidateCount);
76 noway_assert(optCSEtab[index - 1]);
78 return optCSEtab[index - 1];
81 /*****************************************************************************
83 * For a previously marked CSE, decrement the use counts and unmark it
86 void Compiler::optUnmarkCSE(GenTreePtr tree)
88 if (!IS_CSE_INDEX(tree->gtCSEnum))
90 // This tree is not a CSE candidate, so there is nothing
95 unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
98 // make sure it's been initialized
99 noway_assert(optCSEweight <= BB_MAX_WEIGHT);
101 /* Is this a CSE use? */
102 if (IS_CSE_USE(tree->gtCSEnum))
104 desc = optCSEfindDsc(CSEnum);
109 printf("Unmark CSE use #%02d at ", CSEnum);
111 printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
115 /* Reduce the nested CSE's 'use' count */
117 noway_assert(desc->csdUseCount > 0);
119 if (desc->csdUseCount > 0)
121 desc->csdUseCount -= 1;
123 if (desc->csdUseWtCnt < optCSEweight)
125 desc->csdUseWtCnt = 0;
129 desc->csdUseWtCnt -= optCSEweight;
135 desc = optCSEfindDsc(CSEnum);
140 printf("Unmark CSE def #%02d at ", CSEnum);
142 printf(": %3d -> %3d\n", desc->csdDefCount, desc->csdDefCount - 1);
146 /* Reduce the nested CSE's 'def' count */
148 noway_assert(desc->csdDefCount > 0);
150 if (desc->csdDefCount > 0)
152 desc->csdDefCount -= 1;
154 if (desc->csdDefWtCnt < optCSEweight)
156 desc->csdDefWtCnt = 0;
160 desc->csdDefWtCnt -= optCSEweight;
165 tree->gtCSEnum = NO_CSE;
168 Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTreePtr* pTree, fgWalkData* data)
170 if (*pTree == data->pCallbackData)
172 return WALK_CONTINUE;
175 if ((*pTree)->gtFlags & GTF_DONT_CSE)
178 // Fix 392756 WP7 Crossgen
179 // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT
181 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
182 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
183 // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this
184 // should not prevent tree's above the constant from becoming CSE's.
186 if ((*pTree)->gtOper == GT_CNS_INT)
188 return WALK_SKIP_SUBTREES;
194 return WALK_SKIP_SUBTREES;
197 Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTreePtr* pTree, fgWalkData* data)
199 GenTree* tree = *pTree;
200 Compiler* comp = data->compiler;
202 /* Calls get DONT_CSE implicitly */
203 if (tree->OperGet() == GT_CALL)
205 if (!IsSharedStaticHelper(tree))
207 tree->gtFlags |= GTF_DONT_CSE;
211 if ((tree->gtFlags & GTF_DONT_CSE) == 0)
213 /* Propagate the DONT_CSE flag from child to parent */
214 if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT)
216 tree->gtFlags |= GTF_DONT_CSE;
220 return WALK_CONTINUE;
223 /*****************************************************************************
225 * Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's.
229 Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTreePtr* pTree, fgWalkData* data)
231 GenTreePtr tree = *pTree;
232 Compiler* comp = data->compiler;
233 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
235 // We may have a non-NULL side effect list that is being kept
239 GenTreePtr keptTree = keepList;
240 while (keptTree->OperGet() == GT_COMMA)
242 assert(keptTree->OperKind() & GTK_SMPOP);
243 GenTreePtr op1 = keptTree->gtOp.gtOp1;
244 GenTreePtr op2 = keptTree->gtGetOp2();
246 // For the GT_COMMA case the op1 is part of the orginal CSE tree
247 // that is being kept because it contains some side-effect
251 // This tree and all of its sub trees are being kept
252 return WALK_SKIP_SUBTREES;
255 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
256 // which can again be another GT_COMMA or the final side-effect part
260 if (tree == keptTree)
262 // This tree and all of its sub trees are being kept
263 return WALK_SKIP_SUBTREES;
267 // This node is being removed from the graph of GenTreePtr
268 // Call optUnmarkCSE and decrement the LclVar ref counts.
269 comp->optUnmarkCSE(tree);
270 assert(!IS_CSE_INDEX(tree->gtCSEnum));
272 /* Look for any local variable references */
274 if (tree->gtOper == GT_LCL_VAR)
279 /* This variable ref is going away, decrease its ref counts */
281 lclNum = tree->gtLclVarCommon.gtLclNum;
282 assert(lclNum < comp->lvaCount);
283 varDsc = comp->lvaTable + lclNum;
285 // make sure it's been initialized
286 assert(comp->optCSEweight <= BB_MAX_WEIGHT);
288 /* Decrement its lvRefCnt and lvRefCntWtd */
290 varDsc->decRefCnts(comp->optCSEweight, comp);
293 return WALK_CONTINUE;
296 Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData* walkData)
298 GenTree* tree = *pTree;
299 Compiler* comp = walkData->compiler;
300 optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
302 if (IS_CSE_INDEX(tree->gtCSEnum))
304 unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
305 unsigned cseBit = genCSEnum2bit(cseIndex);
306 if (IS_CSE_DEF(tree->gtCSEnum))
308 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit);
312 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit);
316 return WALK_CONTINUE;
319 // This functions walks all the node for an given tree
320 // and return the mask of CSE defs and uses for the tree
322 void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
324 pMaskData->CSE_defMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
325 pMaskData->CSE_useMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
326 fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
329 //------------------------------------------------------------------------
330 // optCSE_canSwap: Determine if the execution order of two nodes can be swapped.
333 // op1 - The first node
334 // op2 - The second node
337 // Return true iff it safe to swap the execution order of 'op1' and 'op2',
338 // considering only the locations of the CSE defs and uses.
341 // 'op1' currently occurse before 'op2' in the execution order.
343 bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
345 // op1 and op2 must be non-null.
346 assert(op1 != nullptr);
347 assert(op2 != nullptr);
349 bool canSwap = true; // the default result unless proven otherwise.
351 optCSE_MaskData op1MaskData;
352 optCSE_MaskData op2MaskData;
354 optCSE_GetMaskData(op1, &op1MaskData);
355 optCSE_GetMaskData(op2, &op2MaskData);
357 // We cannot swap if op1 contains a CSE def that is used by op2
358 if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask))
364 // We also cannot swap if op2 contains a CSE def that is used by op1.
365 if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask))
374 //------------------------------------------------------------------------
375 // optCSE_canSwap: Determine if the execution order of a node's operands can be swapped.
378 // tree - The node of interest
381 // Return true iff it safe to swap the execution order of the operands of 'tree',
382 // considering only the locations of the CSE defs and uses.
384 bool Compiler::optCSE_canSwap(GenTreePtr tree)
386 // We must have a binary treenode with non-null op1 and op2
387 assert((tree->OperKind() & GTK_SMPOP) != 0);
389 GenTreePtr op1 = tree->gtOp.gtOp1;
390 GenTreePtr op2 = tree->gtGetOp2();
392 return optCSE_canSwap(op1, op2);
395 /*****************************************************************************
397 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
398 * when (CodeOptKind() != Compiler::SMALL_CODE)
402 int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2)
404 CSEdsc* dsc1 = *(CSEdsc**)op1;
405 CSEdsc* dsc2 = *(CSEdsc**)op2;
407 GenTreePtr exp1 = dsc1->csdTree;
408 GenTreePtr exp2 = dsc2->csdTree;
412 diff = (int)(exp2->gtCostEx - exp1->gtCostEx);
419 // Sort the higher Use Counts toward the top
420 diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt);
427 // With the same use count, Sort the lower Def Counts toward the top
428 diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt);
435 // In order to ensure that we have a stable sort, we break ties using the csdIndex
436 return (int)(dsc1->csdIndex - dsc2->csdIndex);
439 /*****************************************************************************
441 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
442 * when (CodeOptKind() == Compiler::SMALL_CODE)
446 int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2)
448 CSEdsc* dsc1 = *(CSEdsc**)op1;
449 CSEdsc* dsc2 = *(CSEdsc**)op2;
451 GenTreePtr exp1 = dsc1->csdTree;
452 GenTreePtr exp2 = dsc2->csdTree;
456 diff = (int)(exp2->gtCostSz - exp1->gtCostSz);
463 // Sort the higher Use Counts toward the top
464 diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount);
471 // With the same use count, Sort the lower Def Counts toward the top
472 diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount);
479 // In order to ensure that we have a stable sort, we break ties using the csdIndex
480 return (int)(dsc1->csdIndex - dsc2->csdIndex);
483 /*****************************************************************************/
484 #if FEATURE_VALNUM_CSE
485 /*****************************************************************************/
487 /*****************************************************************************
489 * Initialize the Value Number CSE tracking logic.
492 void Compiler::optValnumCSE_Init()
498 // Init traits and full/empty bitvectors. This will be used to track the
499 // individual cse indexes.
500 cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this);
501 cseFull = BitVecOps::UninitVal();
502 cseEmpty = BitVecOps::UninitVal();
503 BitVecOps::AssignNoCopy(cseTraits, cseFull, BitVecOps::MakeFull(cseTraits));
504 BitVecOps::AssignNoCopy(cseTraits, cseEmpty, BitVecOps::MakeEmpty(cseTraits));
506 /* Allocate and clear the hash bucket table */
508 optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
510 optCSECandidateCount = 0;
511 optDoCSE = false; // Stays false until we find duplicate CSE tree
513 // cseArrLenMap is unused in most functions, allocated only when used
514 cseArrLenMap = nullptr;
517 /*****************************************************************************
519 * Assign an index to the given expression (adding it to the lookup table,
520 * if necessary). Returns the index or 0 if the expression can not be a CSE.
523 unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
530 ValueNum vnlib = tree->GetVN(VNK_Liberal);
532 /* Compute the hash value for the expression */
534 key = (unsigned)vnlib;
537 hash *= (unsigned)(s_optCSEhashSize + 1);
540 hval = hash % s_optCSEhashSize;
542 /* Look for a matching index in the hash table */
546 for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket)
548 if (hashDsc->csdHashValue == key)
550 treeStmtLstPtr newElem;
552 /* Have we started the list of matching nodes? */
554 if (hashDsc->csdTreeList == nullptr)
556 // Create the new element based upon the matching hashDsc element.
558 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
560 newElem->tslTree = hashDsc->csdTree;
561 newElem->tslStmt = hashDsc->csdStmt;
562 newElem->tslBlock = hashDsc->csdBlock;
563 newElem->tslNext = nullptr;
565 /* Start the list with the first CSE candidate recorded */
567 hashDsc->csdTreeList = newElem;
568 hashDsc->csdTreeLast = newElem;
571 noway_assert(hashDsc->csdTreeList);
573 /* Append this expression to the end of the list */
575 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
577 newElem->tslTree = tree;
578 newElem->tslStmt = stmt;
579 newElem->tslBlock = compCurBB;
580 newElem->tslNext = nullptr;
582 hashDsc->csdTreeLast->tslNext = newElem;
583 hashDsc->csdTreeLast = newElem;
585 optDoCSE = true; // Found a duplicate CSE tree
587 /* Have we assigned a CSE index? */
588 if (hashDsc->csdIndex == 0)
594 // Use this to see if this Value Number base CSE is also a lexical CSE
595 bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true);
598 assert(FitsIn<signed char>(hashDsc->csdIndex));
599 tree->gtCSEnum = ((signed char)hashDsc->csdIndex);
600 return hashDsc->csdIndex;
606 /* Not found, create a new entry (unless we have too many already) */
608 if (optCSECandidateCount < MAX_CSE_CNT)
610 hashDsc = new (this, CMK_CSE) CSEdsc;
612 hashDsc->csdHashValue = key;
613 hashDsc->csdIndex = 0;
614 hashDsc->csdLiveAcrossCall = 0;
615 hashDsc->csdDefCount = 0;
616 hashDsc->csdUseCount = 0;
617 hashDsc->csdDefWtCnt = 0;
618 hashDsc->csdUseWtCnt = 0;
620 hashDsc->csdTree = tree;
621 hashDsc->csdStmt = stmt;
622 hashDsc->csdBlock = compCurBB;
623 hashDsc->csdTreeList = nullptr;
625 /* Append the entry to the hash bucket */
627 hashDsc->csdNextInBucket = optCSEhash[hval];
628 optCSEhash[hval] = hashDsc;
632 else // newCSE is true
634 /* We get here only after finding a matching CSE */
636 /* Create a new CSE (unless we have the maximum already) */
638 if (optCSECandidateCount == MAX_CSE_CNT)
643 C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
645 unsigned CSEindex = ++optCSECandidateCount;
646 // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
648 /* Record the new CSE index in the hashDsc */
649 hashDsc->csdIndex = CSEindex;
651 /* Update the gtCSEnum field in the original tree */
652 noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
653 assert(FitsIn<signed char>(CSEindex));
655 hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex);
656 noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
658 tree->gtCSEnum = ((signed char)CSEindex);
663 EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex));
664 printf("\nCSE candidate #%02u, vn=", CSEindex);
666 printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(cseTraits, tempMask), compCurBB->bbNum,
667 tree->gtCostEx, tree->gtCostSz);
676 /*****************************************************************************
678 * Locate CSE candidates and assign indices to them
679 * return 0 if no CSE candidates were found
680 * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
683 unsigned Compiler::optValnumCSE_Locate()
685 // Locate CSE candidates and assign them indices
687 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
692 /* Make the block publicly available */
696 /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
697 /* Everyone who uses these flags are required to clear afterwards */
698 noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
700 /* Walk the statement trees in this basic block */
701 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
703 noway_assert(stmt->gtOper == GT_STMT);
705 /* We walk the tree in the forwards direction (bottom up) */
706 bool stmtHasArrLenCandidate = false;
707 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
709 if (tree->OperIsCompare() && stmtHasArrLenCandidate)
711 // Check if this compare is a function of (one of) the arrary
712 // length candidate(s); we may want to update its value number
713 // if the array length gets CSEd
714 updateCseArrLenMap(tree);
717 if (!optIsCSEcandidate(tree))
722 ValueNum vnlib = tree->GetVN(VNK_Liberal);
724 if (ValueNumStore::isReservedVN(vnlib))
729 // Don't CSE constant values, instead let the Value Number
730 // based Assertion Prop phase handle them.
732 if (vnStore->IsVNConstant(vnlib))
737 /* Assign an index to this expression */
739 unsigned CSEindex = optValnumCSE_Index(tree, stmt);
743 noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
746 if (IS_CSE_INDEX(CSEindex) && (tree->OperGet() == GT_ARR_LENGTH))
748 stmtHasArrLenCandidate = true;
754 /* We're done if there were no interesting expressions */
761 /* We're finished building the expression lookup table */
768 //------------------------------------------------------------------------
769 // updateCseArrLenMap: Check if this compare is a tractable function of
770 // an array length that is a CSE candidate, and insert
771 // an entry in the cseArrLenMap if so. This facilitates
772 // subsequently updating the compare's value number if
773 // the array length gets CSEd.
776 // compare - The compare node to check
778 void Compiler::updateCseArrLenMap(GenTreePtr compare)
780 assert(compare->OperIsCompare());
782 ValueNum compareVN = compare->gtVNPair.GetConservative();
783 VNFuncApp cmpVNFuncApp;
785 if (!vnStore->GetVNFunc(compareVN, &cmpVNFuncApp) ||
786 (cmpVNFuncApp.m_func != GetVNFuncForOper(compare->OperGet(), (compare->gtFlags & GTF_UNSIGNED) != 0)))
788 // Value numbering inferred this compare as something other
789 // than its own operator; leave its value number alone.
793 // Now look for an array length feeding the compare
794 ValueNumStore::ArrLenArithBoundInfo info;
795 GenTreePtr arrLenParent = nullptr;
797 if (vnStore->IsVNArrLenBound(compareVN))
799 // Simple compare of an array legnth against something else.
801 vnStore->GetArrLenBoundInfo(compareVN, &info);
802 arrLenParent = compare;
804 else if (vnStore->IsVNArrLenArithBound(compareVN))
806 // Compare of an array length +/- some offset to something else.
808 GenTreePtr op1 = compare->gtGetOp1();
809 GenTreePtr op2 = compare->gtGetOp2();
811 vnStore->GetArrLenArithBoundInfo(compareVN, &info);
812 if (GetVNFuncForOper(op1->OperGet(), (op1->gtFlags & GTF_UNSIGNED) != 0) == info.arrOper)
814 // The arithmetic node is the array length's parent.
817 else if (GetVNFuncForOper(op2->OperGet(), (op2->gtFlags & GTF_UNSIGNED) != 0) == info.arrOper)
819 // The arithmetic node is the array length's parent.
824 if (arrLenParent != nullptr)
826 GenTreePtr arrLen = nullptr;
828 // Find which child of arrLenParent is the array length. Abort if its
829 // conservative value number doesn't match the one from the compare VN.
831 GenTreePtr child1 = arrLenParent->gtGetOp1();
832 if ((child1->OperGet() == GT_ARR_LENGTH) && IS_CSE_INDEX(child1->gtCSEnum) &&
833 (info.vnArray == child1->AsArrLen()->ArrRef()->gtVNPair.GetConservative()))
839 GenTreePtr child2 = arrLenParent->gtGetOp2();
840 if ((child2->OperGet() == GT_ARR_LENGTH) && IS_CSE_INDEX(child2->gtCSEnum) &&
841 (info.vnArray == child2->AsArrLen()->ArrRef()->gtVNPair.GetConservative()))
847 if (arrLen != nullptr)
849 // Found an arrayLen feeding a compare that is a tracatable function of it;
850 // record this in the map so we can update the compare VN if the array length
853 if (cseArrLenMap == nullptr)
855 // Allocate map on first use.
856 cseArrLenMap = new (getAllocator()) NodeToNodeMap(getAllocator());
859 cseArrLenMap->Set(arrLen, compare);
864 /*****************************************************************************
866 * Compute each blocks bbCseGen
867 * This is the bitset that represents the CSEs that are generated within the block
869 void Compiler::optValnumCSE_InitDataFlow()
871 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
876 /* Initialize the blocks's bbCseIn set */
878 bool init_to_zero = false;
880 if (block == fgFirstBB)
882 /* Clear bbCseIn for the entry block */
885 #if !CSE_INTO_HANDLERS
888 if (bbIsHandlerBeg(block))
890 /* Clear everything on entry to filters or handlers */
897 /* Initialize to {ZERO} prior to dataflow */
898 block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseEmpty);
902 /* Initialize to {ALL} prior to dataflow */
903 block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull);
906 block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull);
908 /* Initialize to {ZERO} prior to locating the CSE candidates */
909 block->bbCseGen = BitVecOps::MakeCopy(cseTraits, cseEmpty);
912 // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
913 // in the block's bbCseGen bitset
915 for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
917 CSEdsc* dsc = optCSEtab[cnt];
918 unsigned CSEindex = dsc->csdIndex;
919 treeStmtLstPtr lst = dsc->csdTreeList;
922 while (lst != nullptr)
924 BasicBlock* block = lst->tslBlock;
925 BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex));
931 // Dump out the bbCseGen information that we just created
935 bool headerPrinted = false;
936 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
938 if (block->bbCseGen != nullptr)
942 printf("\nBlocks that generate CSE def/uses\n");
943 headerPrinted = true;
945 printf("BB%02u", block->bbNum);
946 printf(" cseGen = %s\n", genES2str(cseTraits, block->bbCseGen));
956 /*****************************************************************************
958 * CSE Dataflow, so that all helper methods for dataflow are in a single place
964 EXPSET_TP m_preMergeOut;
966 Compiler* m_pCompiler;
969 CSE_DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
973 Compiler* getCompiler()
978 // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
979 void StartMerge(BasicBlock* block)
981 m_preMergeOut = BitVecOps::MakeCopy(m_pCompiler->cseTraits, block->bbCseOut);
984 // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
985 void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
987 BitVecOps::IntersectionD(m_pCompiler->cseTraits, block->bbCseIn, predBlock->bbCseOut);
990 // At the end of the merge store results of the dataflow equations, in a postmerge state.
991 bool EndMerge(BasicBlock* block)
993 BitVecTraits* traits = m_pCompiler->cseTraits;
994 EXPSET_TP mergeOut = BitVecOps::MakeCopy(traits, block->bbCseIn);
995 BitVecOps::UnionD(traits, mergeOut, block->bbCseGen);
996 BitVecOps::IntersectionD(traits, mergeOut, block->bbCseOut);
997 BitVecOps::Assign(traits, block->bbCseOut, mergeOut);
998 return (!BitVecOps::Equal(traits, mergeOut, m_preMergeOut));
1002 /*****************************************************************************
1004 * Perform a DataFlow forward analysis using the block CSE bitsets:
1006 * bbCseGen - Exact CSEs that are become available within the block
1007 * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block
1008 * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block
1011 * bbCseIn - Computed CSEs that are available at input to the block
1012 * bbCseOut - Computed CSEs that are available at exit to the block
1015 void Compiler::optValnumCSE_DataFlow()
1017 CSE_DataFlow cse(this);
1019 // Modified dataflow algorithm for available expressions.
1020 DataFlow cse_flow(this);
1022 cse_flow.ForwardAnalysis(cse);
1027 printf("\nAfter performing DataFlow for ValnumCSE's\n");
1029 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1031 printf("BB%02u", block->bbNum);
1032 printf(" cseIn = %s", genES2str(cseTraits, block->bbCseIn));
1033 printf(" cseOut = %s", genES2str(cseTraits, block->bbCseOut));
1042 /*****************************************************************************
1044 * Using the information computed by CSE_DataFlow determine for each
1045 * CSE whether the CSE is a definition (if the CSE was not available)
1046 * or if the CSE is a use (if the CSE was previously made available)
1047 * The implementation iterates of all blocks setting 'available_cses'
1048 * to the CSEs that are available at input to the block.
1049 * When a CSE expression is encountered it is classified as either
1050 * as a definition (if the CSE is not in the 'available_cses' set) or
1051 * as a use (if the CSE is in the 'available_cses' set). If the CSE
1052 * is a definition then it is added to the 'available_cses' set.
1053 * In the Value Number based CSEs we do not need to have kill sets
1056 void Compiler::optValnumCSE_Availablity()
1061 printf("Labeling the CSEs with Use/Def information\n");
1064 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1069 /* Make the block publicly available */
1073 EXPSET_TP available_cses = BitVecOps::MakeCopy(cseTraits, block->bbCseIn);
1075 optCSEweight = block->getBBWeight(this);
1077 /* Walk the statement trees in this basic block */
1079 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
1081 noway_assert(stmt->gtOper == GT_STMT);
1083 /* We walk the tree in the forwards direction (bottom up) */
1084 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
1086 if (IS_CSE_INDEX(tree->gtCSEnum))
1088 unsigned int cseBit = genCSEnum2bit(tree->gtCSEnum);
1089 CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum);
1090 unsigned stmw = block->getBBWeight(this);
1092 /* Is this expression available here? */
1094 if (BitVecOps::IsMember(cseTraits, available_cses, cseBit))
1096 /* This is a CSE use */
1098 desc->csdUseCount += 1;
1099 desc->csdUseWtCnt += stmw;
1103 if (tree->gtFlags & GTF_COLON_COND)
1105 // We can't create CSE definitions inside QMARK-COLON trees
1106 tree->gtCSEnum = NO_CSE;
1110 /* This is a CSE def */
1112 if (desc->csdDefCount == 0)
1114 // This is the first def visited, so copy its conservative VN
1115 desc->defConservativeVN = tree->gtVNPair.GetConservative();
1117 else if (tree->gtVNPair.GetConservative() != desc->defConservativeVN)
1119 // This candidate has defs with differing conservative VNs
1120 desc->defConservativeVN = ValueNumStore::NoVN;
1123 desc->csdDefCount += 1;
1124 desc->csdDefWtCnt += stmw;
1126 /* Mark the node as a CSE definition */
1128 tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
1130 /* This CSE will be available after this def */
1131 BitVecOps::AddElemD(cseTraits, available_cses, cseBit);
1134 if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
1136 printf("BB%02u ", block->bbNum);
1138 printf(" %s of CSE #%02u [weight=%s]\n", IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def",
1139 GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw));
1148 // The following class handles the CSE heuristics
1149 // we use a complex set of heuristic rules
1150 // to determine if it is likely to be profitable to perform this CSE
1154 Compiler* m_pCompiler;
1155 unsigned m_addCSEcount;
1157 unsigned aggressiveRefCnt;
1158 unsigned moderateRefCnt;
1159 unsigned enregCount; // count of the number of enregisterable variables
1162 Compiler::codeOptimize codeOptKind;
1163 Compiler::CSEdsc** sortTab;
1171 CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler)
1173 codeOptKind = m_pCompiler->compCodeOpt();
1176 Compiler::codeOptimize CodeOptKind()
1181 // Perform the Initialization step for our CSE Heuristics
1182 // determine the various cut off values to use for
1183 // the aggressive, moderate and conservative CSE promotions
1184 // count the number of enregisterable variables
1185 // determine if the method has a large or huge stack frame.
1189 m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
1191 // Record the weighted ref count of the last "for sure" callee saved LclVar
1192 aggressiveRefCnt = 0;
1200 #ifdef _TARGET_XARCH_
1201 if (m_pCompiler->compLongUsed)
1207 unsigned frameSize = 0;
1208 unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
1212 for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
1214 if (varDsc->lvRefCnt == 0)
1219 bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
1221 // Some LclVars always have stack homes
1222 if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
1228 // Treat floating point and 64 bit integers as always on the stack
1229 if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet()))
1235 frameSize += m_pCompiler->lvaLclSize(lclNum);
1239 // For the purposes of estimating the frameSize we
1240 // will consider this LclVar as being enregistered.
1241 // Now we reduce the remaining regAvailEstimate by
1242 // an appropriate amount.
1243 if (varDsc->lvRefCnt <= 2)
1245 // a single use single def LclVar only uses 1
1246 regAvailEstimate -= 1;
1250 // a LclVar with multiple uses and defs uses 2
1251 if (regAvailEstimate >= 2)
1253 regAvailEstimate -= 2;
1257 // Don't try to subtract when regAvailEstimate is 1
1258 regAvailEstimate = 0;
1262 #ifdef _TARGET_XARCH_
1263 if (frameSize > 0x080)
1265 // We likely have a large stack frame.
1266 // Thus we might need to use large displacements when loading or storing
1267 // to CSE LclVars that are not enregistered
1269 break; // early out, we don't need to keep increasing frameSize
1271 #else // _TARGET_ARM_
1272 if (frameSize > 0x0400)
1276 if (frameSize > 0x10000)
1284 unsigned sortNum = 0;
1285 while (sortNum < m_pCompiler->lvaTrackedCount)
1287 LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
1288 var_types varTyp = varDsc->TypeGet();
1290 if (varDsc->lvDoNotEnregister)
1295 if (!varTypeIsFloating(varTyp))
1297 // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
1298 // Note that this makes genTypeStSz return 1.
1299 if (varTypeIsStruct(varTyp))
1301 varTyp = TYP_STRUCT;
1303 enregCount += genTypeStSz(varTyp);
1306 if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
1308 if (CodeOptKind() == Compiler::SMALL_CODE)
1310 aggressiveRefCnt = varDsc->lvRefCnt + BB_UNITY_WEIGHT;
1314 aggressiveRefCnt = varDsc->lvRefCntWtd + BB_UNITY_WEIGHT;
1317 if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
1319 if (CodeOptKind() == Compiler::SMALL_CODE)
1321 moderateRefCnt = varDsc->lvRefCnt;
1325 moderateRefCnt = varDsc->lvRefCntWtd;
1330 // use smaller value for mult when enregCount is in [0..4]
1331 if (enregCount <= 4)
1333 mult = (enregCount <= 2) ? 1 : 2;
1336 aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
1337 moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
1340 if (m_pCompiler->verbose)
1343 printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
1344 printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
1345 printf("Framesize estimate is 0x%04X\n", frameSize);
1346 printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
1351 void SortCandidates()
1353 /* Create an expression table sorted by decreasing cost */
1354 sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
1356 sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
1357 memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
1359 if (CodeOptKind() == Compiler::SMALL_CODE)
1361 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
1365 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
1369 if (m_pCompiler->verbose)
1371 printf("\nSorted CSE candidates:\n");
1372 /* Print out the CSE candidates */
1374 for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
1376 Compiler::CSEdsc* dsc = sortTab[cnt];
1377 GenTreePtr expr = dsc->csdTree;
1382 if (CodeOptKind() == Compiler::SMALL_CODE)
1384 def = dsc->csdDefCount; // def count
1385 use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
1389 def = dsc->csdDefWtCnt; // weighted def count
1390 use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1393 tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex));
1394 printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
1395 genES2str(m_pCompiler->cseTraits, tempMask), dsc->csdUseCount, def, use);
1397 m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
1404 // The following class nested within CSE_Heuristic encapsulates the information
1405 // about the current CSE candidate that is under consideration
1407 // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
1408 // and needs to be reworked for the Value Number based implementation
1412 CSE_Heuristic* m_context;
1413 Compiler::CSEdsc* m_CseDsc;
1415 unsigned m_cseIndex;
1417 unsigned m_defCount;
1418 unsigned m_useCount;
1424 CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
1426 m_cseIndex = m_CseDsc->csdIndex;
1429 Compiler::CSEdsc* CseDsc()
1445 // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
1448 return m_CseDsc->csdTree;
1459 bool LiveAcrossCall()
1461 return (m_CseDsc->csdLiveAcrossCall != 0);
1464 void InitializeCounts()
1466 if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
1468 m_Cost = Expr()->gtCostSz; // the estimated code size
1469 m_Size = Expr()->gtCostSz; // always the gtCostSz
1470 m_defCount = m_CseDsc->csdDefCount; // def count
1471 m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
1475 m_Cost = Expr()->gtCostEx; // the estimated execution cost
1476 m_Size = Expr()->gtCostSz; // always the gtCostSz
1477 m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
1478 m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1484 //------------------------------------------------------------------------
1485 // optConfigBiasedCSE:
1486 // Stress mode to shuffle the decision to CSE or not using environment
1487 // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value
1488 // is not specified but COMPlus_JitStress is ON, generate a random bias.
1491 // 0 -- This method is indifferent about this CSE (no bias specified and no stress)
1492 // 1 -- This CSE must be performed to maintain specified/generated bias.
1493 // -1 -- This CSE mustn't be performed to maintain specified/generated bias.
1496 // A debug stress only method that returns "1" with probability (P)
1499 // P = (COMPlus_JitStressBiasedCSE / 100) (or)
1500 // P = (random(100) / 100) when COMPlus_JitStress is specified and
1501 // COMPlus_JitStressBiasedCSE is unspecified.
1503 // When specified, the bias is reinterpreted as a decimal number between 0
1505 // When bias is not specified, a bias is randomly generated if COMPlus_JitStress
1508 // Callers are supposed to call this method for each CSE promotion decision
1509 // and ignore the call if return value is 0 and honor the 1 with a CSE and
1510 // -1 with a no-CSE to maintain the specified/generated bias.
1512 int optConfigBiasedCSE()
1514 // Seed the PRNG, if never done before.
1515 if (!m_cseRNG.IsInitialized())
1517 m_cseRNG.Init(m_pCompiler->info.compMethodHash());
1518 m_bias = m_cseRNG.Next(100);
1521 // Obtain the bias value and reinterpret as decimal.
1522 unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE());
1524 // Invalid value, check if JitStress is ON.
1527 if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
1529 // JitStress is OFF for CSE, nothing to do.
1533 JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias);
1536 // Generate a number between (0, 99) and if the generated
1537 // number is smaller than bias, then perform CSE.
1538 unsigned gen = m_cseRNG.Next(100);
1539 int ret = (gen < bias) ? 1 : -1;
1541 if (m_pCompiler->verbose)
1545 printf("No CSE because gen=%d >= bias=%d\n", gen, bias);
1549 printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias);
1553 // Indicate whether to perform CSE or not.
1558 // Given a CSE candidate decide whether it passes or fails the profitablity heuristic
1559 // return true if we believe that it is profitable to promote this candidate to a CSE
1561 bool PromotionCheck(CSE_Candidate* candidate)
1563 bool result = false;
1566 int stressResult = optConfigBiasedCSE();
1567 if (stressResult != 0)
1569 // Stress is enabled. Check whether to perform CSE or not.
1570 return (stressResult > 0);
1573 if (m_pCompiler->optConfigDisableCSE2())
1575 return false; // skip this CSE
1580 Our calculation is based on the following cost estimate formula
1586 If we introduce a CSE temp are each definition and
1587 replace the use with a CSE temp then our cost is:
1589 (def * (cost + cse-def-cost)) + (use * cse-use-cost)
1591 We must estimate the values to use for cse-def-cost and cse-use-cost
1593 If we are able to enregister the CSE then the cse-use-cost is one
1594 and cse-def-cost is either zero or one. Zero in the case where
1595 we needed to evaluate the def into a register and we can use that
1596 register as the CSE temp as well.
1598 If we are unable to enregister the CSE then the cse-use-cost is IND_COST
1599 and the cse-def-cost is also IND_COST.
1601 If we want to be conservative we use IND_COST as the the value
1602 for both cse-def-cost and cse-use-cost and then we never introduce
1603 a CSE that could pessimize the execution time of the method.
1605 If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
1606 values for both cse-def-cost and cse-use-cost.
1608 If we want to be aggressive we use 1 as the values for both
1609 cse-def-cost and cse-use-cost.
1611 If we believe that the CSE very valuable in terms of weighted ref counts
1612 such that it would always be enregistered by the register allocator we choose
1613 the aggressive use def costs.
1615 If we believe that the CSE is somewhat valuable in terms of weighted ref counts
1616 such that it could be likely be enregistered by the register allocator we choose
1617 the moderate use def costs.
1619 otherwise we choose the conservative use def costs.
1623 unsigned cse_def_cost;
1624 unsigned cse_use_cost;
1626 unsigned no_cse_cost = 0;
1627 unsigned yes_cse_cost = 0;
1628 unsigned extra_yes_cost = 0;
1629 unsigned extra_no_cost = 0;
1631 // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
1632 // Each CSE Def will contain two Refs and each CSE Use wil have one Ref of this new LclVar
1633 unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
1635 if (CodeOptKind() == Compiler::SMALL_CODE)
1637 if (cseRefCnt >= aggressiveRefCnt)
1640 if (m_pCompiler->verbose)
1642 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1648 if (candidate->LiveAcrossCall() != 0)
1662 else if (largeFrame)
1665 if (m_pCompiler->verbose)
1667 printf("Codesize CSE Promotion (large frame)\n");
1670 #ifdef _TARGET_XARCH_
1671 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1672 cse_def_cost = 6; // mov [EBP-0x00001FC],reg
1673 cse_use_cost = 5; // [EBP-0x00001FC]
1674 #else // _TARGET_ARM_
1677 cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
1682 cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
1690 if (m_pCompiler->verbose)
1692 printf("Codesize CSE Promotion (small frame)\n");
1695 #ifdef _TARGET_XARCH_
1696 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1697 cse_def_cost = 3; // mov [EBP-1C],reg
1698 cse_use_cost = 2; // [EBP-1C]
1699 #else // _TARGET_ARM_
1700 cse_def_cost = 2; // str reg,[sp+0x9c]
1701 cse_use_cost = 2; // ldr reg,[sp+0x9c]
1705 else // not SMALL_CODE ...
1707 if (cseRefCnt >= aggressiveRefCnt)
1710 if (m_pCompiler->verbose)
1712 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1718 else if (cseRefCnt >= moderateRefCnt)
1721 if (candidate->LiveAcrossCall() == 0)
1724 if (m_pCompiler->verbose)
1726 printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
1733 else // candidate is live across call
1736 if (m_pCompiler->verbose)
1738 printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
1743 extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
1747 else // Conservative CSE promotion
1749 if (candidate->LiveAcrossCall() == 0)
1752 if (m_pCompiler->verbose)
1754 printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt,
1761 else // candidate is live across call
1764 if (m_pCompiler->verbose)
1766 printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
1771 extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
1775 // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
1776 if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
1795 // estimate the cost from lost codesize reduction if we do not perform the CSE
1796 if (candidate->Size() > cse_use_cost)
1798 Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
1800 extra_no_cost = candidate->Size() - cse_use_cost;
1801 extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
1804 /* no_cse_cost is the cost estimate when we decide not to make a CSE */
1805 /* yes_cse_cost is the cost estimate when we decide to make a CSE */
1807 no_cse_cost = candidate->UseCount() * candidate->Cost();
1808 yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
1810 #if CPU_LONG_USES_REGPAIR
1811 if (candidate->Expr()->TypeGet() == TYP_LONG)
1816 no_cse_cost += extra_no_cost;
1817 yes_cse_cost += extra_yes_cost;
1820 if (m_pCompiler->verbose)
1822 printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
1824 printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(),
1825 candidate->Cost(), candidate->Size());
1826 printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
1827 extra_no_cost, extra_yes_cost);
1829 printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
1830 (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
1834 // Should we make this candidate into a CSE?
1835 // Is the yes cost less than the no cost
1837 if (yes_cse_cost <= no_cse_cost)
1839 result = true; // Yes make this a CSE
1843 /* In stress mode we will make some extra CSEs */
1844 if (no_cse_cost > 0)
1846 int percentage = (no_cse_cost * 100) / yes_cse_cost;
1848 if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
1850 result = true; // Yes make this a CSE
1858 // PerformCSE() takes a successful candidate and performs the appropriate replacements:
1860 // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
1861 // and will replace all of the CSE uses with reads of the "cse0" LclVar
1863 void PerformCSE(CSE_Candidate* successfulCandidate)
1865 unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
1867 if (successfulCandidate->LiveAcrossCall() != 0)
1869 // As we introduce new LclVars for these CSE we slightly
1870 // increase the cutoffs for aggressive and moderate CSE's
1872 int incr = BB_UNITY_WEIGHT;
1874 #if CPU_LONG_USES_REGPAIR
1875 if (successfulCandidate->Expr()->TypeGet() == TYP_LONG)
1879 if (cseRefCnt > aggressiveRefCnt)
1881 aggressiveRefCnt += incr;
1884 if (cseRefCnt > moderateRefCnt)
1886 moderateRefCnt += (incr / 2);
1890 /* Introduce a new temp for the CSE */
1892 // we will create a long lifetime temp for the new cse LclVar
1893 unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
1894 var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
1895 if (varTypeIsStruct(cseLclVarTyp))
1897 m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
1899 m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
1900 m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
1902 m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
1903 m_pCompiler->optCSEcount++;
1905 ValueNum defConservativeVN = successfulCandidate->CseDsc()->defConservativeVN;
1907 /* Walk all references to this CSE, adding an assignment
1908 to the CSE temp to all defs and changing all refs to
1909 a simple use of the CSE temp.
1911 We also unmark nested CSE's for all uses.
1914 Compiler::treeStmtLstPtr lst;
1915 lst = successfulCandidate->CseDsc()->csdTreeList;
1918 #define QQQ_CHECK_CSE_VNS 0
1919 #if QQQ_CHECK_CSE_VNS
1920 assert(lst != NULL);
1921 ValueNum firstVN = lst->tslTree->gtVN;
1923 bool allSame = true;
1926 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
1928 if (lst->tslTree->gtVN != firstVN)
1938 lst = dsc->csdTreeList;
1939 GenTreePtr firstTree = lst->tslTree;
1940 printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName,
1941 GenTree::NodeName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
1944 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
1946 printf("0x%x(%s,%d) ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d",
1947 lst->tslTree->gtVN);
1953 lst = dsc->csdTreeList;
1958 /* Process the next node in the list */
1959 GenTreePtr exp = lst->tslTree;
1960 GenTreePtr stm = lst->tslStmt;
1961 noway_assert(stm->gtOper == GT_STMT);
1962 BasicBlock* blk = lst->tslBlock;
1964 /* Advance to the next node in the list */
1967 // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
1968 assert(exp->gtOper != GT_COUNT);
1970 /* Ignore the node if it's not been marked as a CSE */
1971 if (!IS_CSE_INDEX(exp->gtCSEnum))
1976 /* Make sure we update the weighted ref count correctly */
1977 m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
1979 /* Figure out the actual type of the value */
1980 var_types expTyp = genActualType(exp->TypeGet());
1981 noway_assert(expTyp == cseLclVarTyp);
1983 // This will contain the replacement tree for exp
1984 // It will either be the CSE def or CSE ref
1986 GenTreePtr cse = nullptr;
1988 FieldSeqNode* fldSeq = nullptr;
1989 bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
1991 if (IS_CSE_USE(exp->gtCSEnum))
1993 /* This is a use of the CSE */
1996 if (m_pCompiler->verbose)
1998 printf("\nCSE #%02u use at ", exp->gtCSEnum);
1999 Compiler::printTreeID(exp);
2000 printf(" replaced in BB%02u with temp use.\n", blk->bbNum);
2004 /* check for and collect any SIDE_EFFECTS */
2005 GenTreePtr sideEffList = nullptr;
2007 if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
2009 // Extract any side effects from exp
2011 m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
2014 // We will replace the CSE ref with a new tree
2015 // this is typically just a simple use of the new CSE LclVar
2017 cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2018 cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
2019 if (defConservativeVN != ValueNumStore::NoVN)
2021 // All defs of this CSE share the same conservative VN, and we are rewriting this
2022 // use to fetch the same value with no reload, so we can safely propagate that
2023 // conservative VN to this use. This can help range check elimination later on.
2024 cse->gtVNPair.SetConservative(defConservativeVN);
2027 if ((exp->OperGet() == GT_ARR_LENGTH) && (m_pCompiler->cseArrLenMap != nullptr) &&
2028 (m_pCompiler->cseArrLenMap->Lookup(exp, &cmp)))
2030 // Propagate the new value number to this compare node as well, since
2031 // subsequent range check elimination will try to correlate it with
2032 // the other appearances that are getting CSEd.
2034 ValueNumStore* vnStore = m_pCompiler->vnStore;
2035 ValueNum oldCmpVN = cmp->gtVNPair.GetConservative();
2036 ValueNumStore::ArrLenArithBoundInfo info;
2037 ValueNum newCmpArgVN;
2038 if (vnStore->IsVNArrLenBound(oldCmpVN))
2040 // Comparison is against the array length directly.
2042 newCmpArgVN = defConservativeVN;
2043 vnStore->GetArrLenBoundInfo(oldCmpVN, &info);
2047 // Comparison is against the array length +/- some offset.
2049 assert(vnStore->IsVNArrLenArithBound(oldCmpVN));
2050 vnStore->GetArrLenArithBoundInfo(oldCmpVN, &info);
2051 newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper,
2052 info.arrOp, defConservativeVN);
2054 ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper,
2055 info.cmpOp, newCmpArgVN);
2056 cmp->gtVNPair.SetConservative(newCmpVN);
2060 cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
2063 // If we have side effects then we need to create a GT_COMMA tree instead
2067 noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
2069 if (m_pCompiler->verbose)
2071 printf("\nThe CSE has side effects! Extracting side effects...\n");
2072 m_pCompiler->gtDispTree(sideEffList);
2077 GenTreePtr cseVal = cse;
2078 GenTreePtr curSideEff = sideEffList;
2079 ValueNumStore* vnStore = m_pCompiler->vnStore;
2080 ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
2082 while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
2084 GenTreePtr op1 = curSideEff->gtOp.gtOp1;
2085 GenTreePtr op2 = curSideEff->gtOp.gtOp2;
2087 ValueNumPair op1vnp;
2088 ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
2089 vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
2091 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
2095 // We may have inserted a narrowing cast during a previous remorph
2096 // and it will not have a value number.
2097 if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
2099 // The inserted cast will have no exceptional effects
2100 assert(curSideEff->gtOverflow() == false);
2101 // Process the exception effects from the cast's operand.
2102 curSideEff = curSideEff->gtOp.gtOp1;
2105 ValueNumPair op2vnp;
2106 ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2107 vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
2108 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2110 op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2111 vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
2112 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2114 /* Create a comma node with the sideEffList as op1 */
2115 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
2116 cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
2119 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2121 /* Unmark any nested CSE's in the sub-operands */
2123 // But we do need to communicate the side effect list to optUnmarkCSEs
2124 // as any part of the 'exp' tree that is in the sideEffList is preserved
2125 // and is not deleted and does not have its ref counts decremented
2127 m_pCompiler->optValnumCSE_UnmarkCSEs(exp, sideEffList);
2131 /* This is a def of the CSE */
2134 if (m_pCompiler->verbose)
2136 printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
2137 Compiler::printTreeID(exp);
2138 printf(" replaced in BB%02u with def of V%02u\n", blk->bbNum, cseLclVarNum);
2142 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2144 GenTreePtr val = exp;
2146 /* Create an assignment of the value to the temp */
2147 GenTreePtr asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
2149 // assign the proper Value Numbers
2150 asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
2151 asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
2153 noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
2154 noway_assert(asg->gtOp.gtOp2 == val);
2156 /* Create a reference to the CSE temp */
2157 GenTreePtr ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2158 ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val'
2160 // If it has a zero-offset field seq, copy annotation to the ref
2161 if (hasZeroMapAnnotation)
2163 m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
2166 /* Create a comma node for the CSE assignment */
2167 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
2168 cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
2169 // as the assignment to the CSE LclVar
2170 // cannot add any new exceptions
2173 // Increment ref count for the CSE ref
2174 m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
2178 // Also increment ref count for the CSE assignment
2179 m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
2182 // Walk the statement 'stm' and find the pointer
2183 // in the tree is pointing to 'exp'
2185 GenTreePtr* link = m_pCompiler->gtFindLink(stm, exp);
2188 if (link == nullptr)
2190 printf("\ngtFindLink failed: stm=");
2191 Compiler::printTreeID(stm);
2193 Compiler::printTreeID(exp);
2196 m_pCompiler->gtDispTree(stm);
2199 m_pCompiler->gtDispTree(exp);
2206 // Mutate this link, thus replacing the old exp with the new cse representation
2210 // If it has a zero-offset field seq, copy annotation.
2211 if (hasZeroMapAnnotation)
2213 m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
2216 assert(m_pCompiler->fgRemoveRestOfBlock == false);
2218 /* re-morph the statement */
2219 m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE"));
2221 } while (lst != nullptr);
2224 // Consider each of the CSE candidates and if the CSE passes
2225 // the PromotionCheck then transform the CSE by calling PerformCSE
2227 void ConsiderCandidates()
2229 /* Consider each CSE candidate, in order of decreasing cost */
2230 unsigned cnt = m_pCompiler->optCSECandidateCount;
2231 Compiler::CSEdsc** ptr = sortTab;
2232 for (; (cnt > 0); cnt--, ptr++)
2234 Compiler::CSEdsc* dsc = *ptr;
2235 CSE_Candidate candidate(this, dsc);
2237 candidate.InitializeCounts();
2239 if (candidate.UseCount() == 0)
2242 if (m_pCompiler->verbose)
2244 printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
2251 if (m_pCompiler->verbose)
2253 printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n", candidate.CseIndex(),
2254 candidate.DefCount(), candidate.UseCount(), candidate.Cost());
2255 m_pCompiler->gtDispTree(candidate.Expr());
2260 if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
2262 // If we reach this point, then the CSE def was incorrectly marked or the
2263 // block with this use is unreachable. So skip and go to the next CSE.
2264 // Without the "continue", we'd generate bad code in retail.
2265 // Commented out a noway_assert(false) here due to bug: 3290124.
2266 // The problem is if there is sub-graph that is not reachable from the
2267 // entry point, the CSE flags propagated, would be incorrect for it.
2271 bool doCSE = PromotionCheck(&candidate);
2274 if (m_pCompiler->verbose)
2278 printf("\nPromoting CSE:\n");
2282 printf("Did Not promote this CSE\n");
2289 PerformCSE(&candidate);
2294 // Perform the necessary cleanup after our CSE heuristics have run
2298 if (m_addCSEcount > 0)
2300 /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */
2301 m_pCompiler->lvaSortAgain = true;
2306 /*****************************************************************************
2308 * Routine for performing the Value Number based CSE using our heuristics
2311 void Compiler::optValnumCSE_Heuristic()
2316 printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
2317 fgDumpTrees(fgFirstBB, nullptr);
2322 CSE_Heuristic cse_heuristic(this);
2324 cse_heuristic.Initialize();
2325 cse_heuristic.SortCandidates();
2326 cse_heuristic.ConsiderCandidates();
2327 cse_heuristic.Cleanup();
2330 /*****************************************************************************
2332 * Routine to unmark any CSEs contained within a tree
2333 * - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept
2337 void Compiler::optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList)
2339 assert(optValnumCSE_phase);
2341 // We need to communicate the 'keepList' to optUnmarkCSEs
2342 // as any part of the 'deadTree' tree that is in the keepList is preserved
2343 // and is not deleted and does not have its ref counts decremented
2344 // We communicate this value using the walkData.pCallbackData field
2347 fgWalkTreePre(&deadTree, optUnmarkCSEs, (void*)keepList);
2350 /*****************************************************************************
2352 * Perform common sub-expression elimination.
2355 void Compiler::optOptimizeValnumCSEs()
2360 printf("\n*************** In optOptimizeValnumCSEs()\n");
2363 if (optConfigDisableCSE())
2365 return; // Disabled by JitNoCSE
2369 optValnumCSE_phase = true;
2371 /* Initialize the expression tracking logic */
2373 optValnumCSE_Init();
2375 /* Locate interesting expressions and assign indices to them */
2377 if (optValnumCSE_Locate() > 0)
2379 optCSECandidateTotal += optCSECandidateCount;
2381 optValnumCSE_InitDataFlow();
2383 optValnumCSE_DataFlow();
2385 optValnumCSE_Availablity();
2387 optValnumCSE_Heuristic();
2390 optValnumCSE_phase = false;
2393 #endif // FEATURE_VALNUM_CSE
2395 /*****************************************************************************
2397 * The following determines whether the given expression is a worthy CSE
2400 bool Compiler::optIsCSEcandidate(GenTreePtr tree)
2402 /* No good if the expression contains side effects or if it was marked as DONT CSE */
2404 if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE))
2409 /* The only reason a TYP_STRUCT tree might occur is as an argument to
2410 GT_ADDR. It will never be actually materialized. So ignore them.
2413 var_types type = tree->TypeGet();
2414 genTreeOps oper = tree->OperGet();
2416 // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
2417 // to use regular assignments, or handling copyObj.
2418 if (varTypeIsStruct(type) || type == TYP_VOID)
2424 if (type == TYP_FLOAT)
2426 // TODO-X86-CQ: Revisit this
2427 // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
2431 if (oper == GT_CNS_DBL)
2433 // TODO-CQ: Revisit this
2434 // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
2440 if (compCodeOpt() == SMALL_CODE)
2442 cost = tree->gtCostSz;
2446 cost = tree->gtCostEx;
2449 /* Don't bother if the potential savings are very low */
2450 if (cost < MIN_CSE_COST)
2456 /* Don't bother with constants */
2457 if (tree->OperKind() & GTK_CONST)
2461 /* Check for some special cases */
2466 // If we have a simple helper call with no other persistent side-effects
2467 // then we allow this tree to be a CSE candidate
2469 if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false)
2475 // Calls generally cannot be CSE-ed
2480 // TODO-CQ: Review this...
2481 /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
2482 Doing the first allows cse to also kick in for code like
2483 "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
2484 the second would not allow it */
2486 return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
2492 return true; // We reach here only when CSE_CONSTS is enabled
2501 return false; // Can't CSE a volatile LCL_VAR
2506 return true; // CSE these Unary Operators
2520 return true; // CSE these Binary Operators
2522 case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
2525 if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
2536 return true; // Also CSE these Comparison Operators
2539 return true; // Intrinsics
2542 return true; // Allow GT_COMMA nodes to be CSE-ed.
2548 return false; // Currently the only special nodes that we hit
2549 // that we know that we don't want to CSE
2552 break; // Any new nodes that we might add later...
2560 // A Debug only method that allows you to control whether the CSE logic is enabled for this method.
2562 // If this method returns false then the CSE phase should be performed.
2563 // If the method returns true then the CSE phase should be skipped.
2565 bool Compiler::optConfigDisableCSE()
2567 // Next check if COMPlus_JitNoCSE is set and applies to this method
2569 unsigned jitNoCSE = JitConfig.JitNoCSE();
2573 unsigned methodCount = Compiler::jitTotalMethodCompiled;
2574 if ((jitNoCSE & 0xF000000) == 0xF000000)
2576 unsigned methodCountMask = methodCount & 0xFFF;
2577 unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF;
2578 unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF;
2580 if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
2584 printf(" Disabled by JitNoCSE methodCountMask\n");
2587 return true; // The CSE phase for this method is disabled
2590 else if (jitNoCSE <= (methodCount + 1))
2594 printf(" Disabled by JitNoCSE > methodCount\n");
2597 return true; // The CSE phase for this method is disabled
2605 // A Debug only method that allows you to control whether the CSE logic is enabled for
2606 // a particular CSE in a method
2608 // If this method returns false then the CSE should be performed.
2609 // If the method returns true then the CSE should be skipped.
2611 bool Compiler::optConfigDisableCSE2()
2613 static unsigned totalCSEcount = 0;
2615 unsigned jitNoCSE2 = JitConfig.JitNoCSE2();
2621 if ((jitNoCSE2 & 0xF000000) == 0xF000000)
2623 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2624 unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF;
2625 unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF;
2627 if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero))
2631 printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
2636 else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
2638 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2639 unsigned disableMask = jitNoCSE2 & 0xFFF;
2641 disableMask >>= (totalCSEMask % 12);
2643 if (disableMask & 1)
2647 printf(" Disabled by jitNoCSE2 rotating disable mask\n");
2652 else if (jitNoCSE2 <= totalCSEcount)
2656 printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
2665 void Compiler::optOptimizeCSEs()
2670 printf("\n*************** In optOptimizeCSEs()\n");
2671 printf("Blocks/Trees at start of optOptimizeCSE phase\n");
2672 fgDispBasicBlocks(true);
2676 optCSECandidateCount = 0;
2677 optCSEstart = lvaCount;
2679 #if FEATURE_VALNUM_CSE
2680 INDEBUG(optEnsureClearCSEInfo());
2681 optOptimizeValnumCSEs();
2682 EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
2683 #endif // FEATURE_VALNUM_CSE
2686 /*****************************************************************************
2688 * Cleanup after CSE to allow us to run more than once.
2691 void Compiler::optCleanupCSEs()
2693 // We must clear the BBF_VISITED and BBF_MARKED flags
2695 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2697 // And clear all the "visited" bits on the block
2699 block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
2701 /* Walk the statement trees in this basic block */
2705 // Initialize 'stmt' to the first non-Phi statement
2706 stmt = block->FirstNonPhiDef();
2708 for (; stmt; stmt = stmt->gtNext)
2710 noway_assert(stmt->gtOper == GT_STMT);
2712 /* We must clear the gtCSEnum field */
2713 for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2715 tree->gtCSEnum = NO_CSE;
2723 /*****************************************************************************
2725 * Ensure that all the CSE information in the IR is initialized the way we expect it,
2726 * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
2729 void Compiler::optEnsureClearCSEInfo()
2731 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2733 assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
2735 /* Walk the statement trees in this basic block */
2739 // Initialize 'stmt' to the first non-Phi statement
2740 stmt = block->FirstNonPhiDef();
2742 for (; stmt; stmt = stmt->gtNext)
2744 assert(stmt->gtOper == GT_STMT);
2746 for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2748 assert(tree->gtCSEnum == NO_CSE);
2756 /*****************************************************************************/
2757 #endif // FEATURE_ANYCSE
2758 /*****************************************************************************/