--- /dev/null
+---
+Language: Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlinesLeft: false
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: true
+ AfterUnion: true
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros: [ ]
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 400
+PenaltyBreakComment: 50
+PenaltyBreakFirstLessLess: 500
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 100000
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
+...
static bool AreEquivalentModuloNativeInt(const typeInfo& verTi, const typeInfo& nodeTi)
{
if (AreEquivalent(verTi, nodeTi)) return true;
- // Otherwise...
#ifdef _TARGET_64BIT_
return (nodeTi.IsType(TI_I_IMPL) && tiCompatibleWith(0, verTi, typeInfo::nativeInt(), true)) ||
(verTi.IsType(TI_I_IMPL) && tiCompatibleWith(0, typeInfo::nativeInt(), nodeTi, true));
// This block will be the new candidate for the insert point
// for the new assignment
- //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
printf("new bestBlock\n");
bestWeight = block->getBBWeight(this);
}
- /* If there is a use of the variable in this block */
- /* then we insert the assignment at the beginning */
- /* otherwise we insert the statement at the end */
+ // If there is a use of the variable in this block
+ // then we insert the assignment at the beginning
+ // otherwise we insert the statement at the end
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
#ifdef _TARGET_64BIT_
if (vnStore->IsVNHandle(vnCns))
{
+#ifdef RELOC_SUPPORT
// Don't perform constant folding that involves a handle that needs
// to be recorded as a relocation with the VM.
-#ifdef RELOC_SUPPORT
if (!opts.compReloc)
#endif
{
#ifndef _TARGET_64BIT_
if (vnStore->IsVNHandle(vnCns))
{
+#ifdef RELOC_SUPPORT
// Don't perform constant folding that involves a handle that needs
// to be recorded as a relocation with the VM.
-#ifdef RELOC_SUPPORT
if (!opts.compReloc)
#endif
{
if (!optLocalAssertionProp)
{
assert(newTree->OperIsConst()); // We should have a simple Constant node for newTree
- assert(vnStore->IsVNConstant(curAssertion->op2.vn)); // The value number stored for op2 should be a valid VN representing the constant
- newTree->gtVNPair.SetBoth(curAssertion->op2.vn); // Set the ValueNumPair to the constant VN from op2 of the assertion
+ assert(vnStore->IsVNConstant(curAssertion->op2.vn)); // The value number stored for op2 should be a valid
+ // VN representing the constant
+ newTree->gtVNPair.SetBoth(curAssertion->op2.vn); // Set the ValueNumPair to the constant VN from op2
+ // of the assertion
}
#ifdef DEBUG
* op1Kind and lclNum, op2Kind and the constant value and is either equal or
* not equal assertion.
*/
-Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind,
- ssize_t cnsVal, ASSERT_VALARG_TP assertions)
+Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum,
+ optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions)
{
noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE));
noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT));
#include "bitsetasshortlong.h"
#include "bitsetasuint64inclass.h"
+// clang-format off
unsigned BitSetSupport::BitCountTable[16] = { 0, 1, 1, 2,
1, 2, 2, 3,
1, 2, 2, 3,
2, 3, 3, 4 };
+// clang-format on
#ifdef DEBUG
template<typename BitSetType,
// An "adapter" class that provides methods that retrieves things from the Env:
// static IAllocator* GetAllococator(Env): yields an "IAllocator*" that the BitSet implementation can use.
// static unsigned GetSize(Env): the current size (= # of bits) of this bitset type.
-// static unsigned GetArrSize(Env, unsigned elemSize): The number of "elemSize" chunks sufficient to hold "GetSize".
-// A given BitSet implementation must call this with only one constant value.
-// Thus, and "Env" may compute this result when GetSize changes.
+// static unsigned GetArrSize(Env, unsigned elemSize): The number of "elemSize" chunks sufficient to hold
+// "GetSize". A given BitSet implementation must call
+// this with only one constant value. Thus, and "Env"
+// may compute this result when GetSize changes.
//
// static unsigned GetEpoch(Env): the current epoch.
//
// In addition to implementing the method signatures here, an instantiation of BitSetOps must also export a
// BitSetOps::Iter type, which supports the following operations:
// Iter(BitSetValueArgType): a constructor
-// bool NextElem(unsigned* pElem): returns true if the iteration is not complete, and sets *pElem to the next yielded member.
+// bool NextElem(unsigned* pElem): returns true if the iteration is not complete, and sets *pElem to the next
+// yielded member.
//
// Finally, it should export two further types:
//
typename BitSetTraits>
class BitSetOps
{
+#if 0
// Below are the set of methods that an instantiation of BitSetOps should provide. This is
// #if'd out because it doesn't make any difference; C++ has no mechanism for checking that
// the methods of an instantiation are consistent with these signatures, other than the expectations
// embodied in the program that uses the instantiation(s). But it's useful documentation, and
// we should try to keep it up to date.
-#if 0
+
public:
// The uninitialized value -- not a real bitset (if possible).
;
}
+#ifndef DEBUG
// In debug we also want the default copy constructor to be private, to make inadvertent
// default initializations illegal. Debug builds therefore arrange to use the
// non-default constructor defined below that takes an extra argument where one would
// otherwise use a copy constructor. In non-debug builds, we don't pass the extra dummy
// int argument, and just make copy constructor defined here visible.
-#ifndef DEBUG
public:
#endif
BitSetUint64(const BitSetUint64& bs) : m_bits(bs.m_bits)
typedef BitVecOps::ValArgType BitVec_ValArg_T;
typedef BitVecOps::RetValType BitVec_ValRet_T;
-// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a set
-// for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
+// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a
+// set for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
#define BITVEC_INIT(_traits, _varName, _initVal) _varName(BitVecOps::MakeCopy(_traits, _initVal))
// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
struct EntryState
{
- ThisInitState thisInitialized : 8; // used to track whether the this ptr is initialized (we could use fewer bits here)
+ ThisInitState thisInitialized : 8; // used to track whether the this ptr is initialized (we could use
+ // fewer bits here)
unsigned esStackDepth : 24; // size of esStack
StackEntry* esStack; // ptr to stack
};
#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type.
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
-#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during non-exceptional flow.
- // Because the ARM calling sequence for calling a finally explicitly sets the return address to
- // the finally target and jumps to the finally, instead of using a call instruction, ARM needs this
- // to generate correct code at the finally target, to allow for proper stack unwind from within a
- // non-exceptional call to a finally.
+#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during
+ // non-exceptional flow. Because the ARM calling sequence for calling a
+ // finally explicitly sets the return address to the finally target and jumps
+ // to the finally, instead of using a call instruction, ARM needs this to
+ // generate correct code at the finally target, to allow for proper stack
+ // unwind from within a non-exceptional call to a finally.
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
#define BBF_BACKWARD_JUMP 0x02000000 // BB is surrounded by a backward jump/switch arc
-#define BBF_RETLESS_CALL 0x04000000 // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired BBJ_ALWAYS); see isBBCallAlwaysPair().
+#define BBF_RETLESS_CALL 0x04000000 // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+ // BBJ_ALWAYS); see isBBCallAlwaysPair().
#define BBF_LOOP_PREHEADER 0x08000000 // BB is a loop preheader block
#define BBF_COLD 0x10000000 // BB is cold
#define BBF_PROF_WEIGHT 0x20000000 // BB weight is computed from profile data
#define BBF_FORWARD_SWITCH 0x40000000 // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
-#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind as BBJ_ALWAYS.
- // Used for the paired BBJ_ALWAYS block following the BBJ_CALLFINALLY block, as well as, on x86,
- // the final step block out of a finally.
+#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+ // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+ // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+ // finally.
bool isRunRarely() { return ((bbFlags & BBF_RUN_RARELY) != 0); }
bool isLoopHead() { return ((bbFlags & BBF_LOOP_HEAD) != 0); }
// analysis that is tracking the contents of local variables might want to consider *all* successors,
// and would pass the current Compiler object.
//
- // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null, NumSucc/GetSucc
- // yields the first block of the try blocks handler.
+ // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null,
+ // NumSucc/GetSucc yields the first block of the try blocks handler.
//
// Also, the behavior for switches changes depending on the value of "comp". If it is null, then all
// switch successors are returned. If it is non-null, then only unique switch successors are returned;
#define MAX_XCPTN_INDEX (USHRT_MAX - 1)
- // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly, especially
- // Compiler::fgNewBBinRegion() and friends.
+ // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly,
+ // especially Compiler::fgNewBBinRegion() and friends.
// index, into the compHndBBtab table, of innermost 'try' clause containing the BB (used for raising exceptions).
// Stored as index + 1; 0 means "no try index".
// Stored as index + 1; 0 means "no handler index".
unsigned short bbHndIndex;
- // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more deeply
- // nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means "main function"
- // and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will have a numeric
- // index relationship that doesn't indicate nesting, whereas a more deeply nested region must have a lower index
- // than the region it is nested within. Note that if you compare a single block's bbTryIndex and bbHndIndex, there
- // is guaranteed to be a nesting relationship, since that block can't be simultaneously in two sibling EH regions.
- // In that case, "maybe" is actually "definitely".
+ // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more
+ // deeply nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means
+ // "main function" and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will
+ // have a numeric index relationship that doesn't indicate nesting, whereas a more deeply nested region must have
+ // a lower index than the region it is nested within. Note that if you compare a single block's bbTryIndex and
+ // bbHndIndex, there is guaranteed to be a nesting relationship, since that block can't be simultaneously in two
+ // sibling EH regions. In that case, "maybe" is actually "definitely".
static bool ehIndexMaybeMoreNested(unsigned index1, unsigned index2)
{
if (index1 == 0)
#endif
IL_OFFSET bbCodeOffs; // IL offset of the beginning of the block
- IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd) range is not inclusive of the end offset.
- // The count of IL bytes in the block is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
+ IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd)
+ // range is not inclusive of the end offset. The count of IL bytes in the block
+ // is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
#ifdef DEBUG
void dspBlockILRange(); // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for BAD_IL_OFFSET.
unsigned bbHeapDef: 1;
unsigned bbHeapLiveIn: 1;
unsigned bbHeapLiveOut: 1;
- unsigned bbHeapHavoc: 1; // If true, at some point the block does an operation that leaves the heap in an unknown state.
- // (E.g., unanalyzed call, store through unknown pointer...)
+ unsigned bbHeapHavoc: 1; // If true, at some point the block does an operation that leaves the heap
+ // in an unknown state. (E.g., unanalyzed call, store through unknown
+ // pointer...)
// We want to make phi functions for the special implicit var "Heap". But since this is not a real
// lclVar, and thus has no local #, we can't use a GenTreePhiArg. Instead, we use this struct.
void* operator new(size_t sz, class Compiler* comp);
};
- static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list for Heap.
+ static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+ // for Heap.
HeapPhiArg* bbHeapSsaPhiFunc; // If the "in" Heap SSA var is not a phi definition, this value is NULL.
- // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate that Heap needs a phi
- // definition on entry, or else it is the linked list of the phi arguments.
+ // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
+ // that Heap needs a phi definition on entry, or else it is the linked list
+ // of the phi arguments.
unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block.
unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block.
/* The following fields used for loop detection */
+ static const unsigned NOT_IN_LOOP = UCHAR_MAX;
+
#ifdef DEBUG
// This is the label a loop gets as part of the second, reachability-based
// loop discovery mechanism. This is apparently only used for debugging.
unsigned char bbLoopNum; // set to 'n' for a loop #n header
#endif // DEBUG
- static const unsigned NOT_IN_LOOP = UCHAR_MAX;
-
unsigned char bbNatLoopNum; // Index, in optLoopTable, of most-nested loop that contains this block,
// or else NOT_IN_LOOP if this block is not in a loop.
}
// Given an the edge b1 -> b2, calculate the slop fraction by
- // using the higher of the two block weights
+ // using the higher of the two block weights
static weight_t GetSlopFraction(BasicBlock* b1, BasicBlock* b2)
{
return GetSlopFraction(max(b1->bbWeight, b2->bbWeight));
typedef BlockSetOps::ValArgType BlockSet_ValArg_T;
typedef BlockSetOps::RetValType BlockSet_ValRet_T;
-// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a var set
-// for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
+// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a
+// var set for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
#define BLOCKSET_INIT(_comp, _varName, _initVal) _varName(BlockSetOps::MakeCopy(_comp, _initVal))
// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
// the GC info. Requires "codeSize" to be the size of the generated code, "prologSize" and "epilogSize"
// to be the sizes of the prolog and epilog, respectively. In DEBUG, makes a check involving the
// "codePtr", assumed to be a pointer to the start of the generated code.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef JIT32_GCENCODER
void* genCreateAndStoreGCInfo (unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
void* genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
//
// Epilog functions
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM_)
bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
regSet.rsSpillBeg();
+#ifdef DEBUGGING_SUPPORT
/* Initialize the line# tracking logic */
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compScopeInfo)
{
siInit();
genUpdateLife(block->bbLiveIn);
// Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
- // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
- // That would require handling the changes in recordVarLocationsAtStartOfBB().
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
regMaskTP newLiveRegSet = RBM_NONE;
regMaskTP newRegGCrefSet = RBM_NONE;
}
/* Start a new code output block */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
#if defined(_TARGET_ARM_)
{
assert(block->bbFlags & BBF_JMP_TARGET);
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
#ifdef DEBUG
if (compiler->verbose)
{
printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
}
#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur,
gcInfo.gcRegGCrefSetCur,
* Generate code for each statement-tree in the block
*
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
if (block->bbFlags & BBF_FUNCLET_BEG)
// The document "X64 and ARM ABIs.docx" has more details. The situations:
// 1. If the call instruction is in a different EH region as the instruction that follows it.
// 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+ // here.)
// We handle case #1 here, and case #2 in the emitter.
if (getEmitter()->emitIsLastInsCall())
{
}
//------------------------------------------------------------------------
-// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet prolog.
-// If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
-// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that instruction.
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
//
// Arguments:
// reg1 - First register of pair to save.
// reg2 - Second register of pair to save.
// spOffset - The offset from SP to store reg1 (must be positive or zero).
-// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
-// lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This allows us to
-// emit the "save_next" unwind code.
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
+// lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
+// allows us to emit the "save_next" unwind code.
// tmpReg - An available temporary register. Needed for the case of large frames.
// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
// Otherwise, we don't touch it.
assert(spOffset >= 0);
assert(spDelta <= 0);
assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
- assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both FP/SIMD
+ assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+ // FP/SIMD
bool needToSaveRegs = true;
if (spDelta != 0)
}
//------------------------------------------------------------------------
-// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or floating-point/SIMD register
-// in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), then spOffset must be 8. This is because
-// otherwise we would create an alignment hole above the saved register, not below it, which we currently don't support. This restriction
-// could be loosened if the callers change to handle it (and this function changes to support using pre-indexed STR addressing).
-// The caller must ensure that we can use the STR instruction, and that spOffset will be in the legal range for that instruction.
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
+// STR instruction, and that spOffset will be in the legal range for that instruction.
//
// Arguments:
// reg1 - Register to save.
// spOffset - The offset from SP to store reg1 (must be positive or zero).
-// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
// tmpReg - An available temporary register. Needed for the case of large frames.
// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
// Otherwise, we don't touch it.
// reg1 - First register of pair to restore.
// reg2 - Second register of pair to restore.
// spOffset - The offset from SP to load reg1 (must be positive or zero).
-// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
// tmpReg - An available temporary register. Needed for the case of large frames.
// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
// Otherwise, we don't touch it.
// Arguments:
// reg1 - Register to restore.
// spOffset - The offset from SP to restore reg1 (must be positive or zero).
-// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
// tmpReg - An available temporary register. Needed for the case of large frames.
// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
// Otherwise, we don't touch it.
// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
// if non-zero spDelta, then this is the offset of the first save *after* that
// SP adjustment.
-// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
//
// Return Value:
// None.
assert((spDelta % 16) == 0);
assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
- assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+ assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
+ // RBM_CALLEE_SAVED.
regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
- // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
- // get the codes to match. Turn this off until that is better understood.
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+ // this epilog, to get the codes to match. Turn this off until that is better understood.
// lastSavedWasPair = true;
spOffset += 2 * REGSIZE_BYTES;
genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
- // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
- // get the codes to match. Turn this off until that is better understood.
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+ // this epilog, to get the codes to match. Turn this off until that is better understood.
// lastSavedWasPair = true;
spOffset += 2 * FPSAVE_REGSIZE_BYTES;
// Arguments:
// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
-// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
//
// Here's an example restore sequence:
// ldp x27, x28, [sp,#96]
// ldp x21, x22, [sp,#16]
// ldp x19, x20, [sp], #80
//
-// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when generating
-// a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
//
// Return Value:
// None.
assert(intRegsToRestoreCount == 0);
}
-
+// clang-format off
/*****************************************************************************
*
* Generates code for an EH funclet prolog.
* | | downward |
* V
*/
+// clang-format on
void CodeGen::genFuncletProlog(BasicBlock* block)
{
return;
assert(isFramePointerUsed());
- assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+ // finalized
genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
regSet.rsSpillBeg();
+#ifdef DEBUGGING_SUPPORT
/* Initialize the line# tracking logic */
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compScopeInfo)
{
siInit();
genUpdateLife(block->bbLiveIn);
// Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
- // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
- // That would require handling the changes in recordVarLocationsAtStartOfBB().
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
regMaskTP newLiveRegSet = RBM_NONE;
regMaskTP newRegGCrefSet = RBM_NONE;
getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
- if ((size == EA_8BYTE) && ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+ if ((size == EA_8BYTE) && ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
{
getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them
//
emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
- genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
+ // ArithmeticException
genDefineTempLabel(sdivLabel);
}
break;
case GT_PUTARG_REG:
- assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg
+ assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by
+ // fgMorphMultiregStructArg
// We have a normal non-Struct targetType
{
GenTree *op1 = treeNode->gtOp.gtOp1;
// Nothing needs to popped off from stack nor relocated.
if (compiler->lvaOutgoingArgSpaceSize > 0)
{
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
}
assert(!initVal->isContained());
assert(!blockSize->isContained());
+#if 0
// TODO-ARM64-CQ: When initblk loop unrolling is implemented
// put this assert back on.
-#if 0
if (blockSize->IsCnsIntOrI())
{
assert(blockSize->gtIntCon.gtIconVal >= INITBLK_UNROLL_LIMIT);
assert(!srcAddr->isContained());
assert(!blockSize->isContained());
- // Enable this when we support cpblk loop unrolling.
#if 0
#ifdef DEBUG
+ // Enable this when we support cpblk loop unrolling.
+
if (blockSize->IsCnsIntOrI())
{
assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
}
- else if (treeNode->gtRegNum != op1->gtRegNum) // If double to double cast or float to float cast. Emit a move instruction.
+ else if (treeNode->gtRegNum != op1->gtRegNum)
{
+ // If double to double cast or float to float cast. Emit a move instruction.
getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
}
emitter* theEmitter = getEmitter();
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// We use this:
// genDefineTempLabel(genCreateTempLabel());
// to create artificial labels to help separate groups of tests.
// Loads/Stores basic general register
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// ldr/str Xt, [reg]
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// Compares
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// cmp reg, reg
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
-
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// R_R
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_I
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// mov reg, imm(i16,hw)
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// tst reg, reg
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_I_I
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// mov reg, imm(i16,hw)
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_I
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_I cmp/txt
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// cmp
theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_I_I
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R_I
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// ADD (extended register)
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R_I -- load/store pair
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R_Ext -- load/store shifted/extend
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// LDR (register)
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R_R
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// R_COND
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// cset reg, cond
theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// R_R_COND
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// cinc reg, reg, cond
// cinv reg, reg, cond
// cneg reg, reg, cond
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// R_R_R_COND
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// csel reg, reg, reg, cond
// csinc reg, reg, reg, cond
// csinv reg, reg, reg, cond
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
// R_R_FLAGS_COND
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// ccmp reg1, reg2, nzcv, cond
theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// Branch to register
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// Misc
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
////////////////////////////////////////////////////////////////////////////////
//
// SIMD and Floating point
// Load/Stores vector register
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// ldr/str Vt, [reg]
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R mov and aliases for mov
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// mov vector to vector
theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_I movi and mvni
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// movi imm8 (vector)
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_I orr/bic vector immediate
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_F cmp/fmov immediate
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// fmov imm8 (scalar)
theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R fmov/fcmp/fcvt
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// fmov to vector to vector
theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R floating point conversions
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// fcvtas scalar
theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R floating point operations, one dest, one source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// fabs scalar
theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
#endif
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R floating point round to int, one dest, one source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
// frinta scalar
theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R floating point operations, one dest, two source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_I vector operations, one dest, one source reg, one immed
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// 'sshr' scalar
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R vector operations, one dest, two source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
// Specifying an Arrangement is optional
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R vector multiply
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R floating point operations, one source/dest, and two source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
genDefineTempLabel(genCreateTempLabel());
theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R_R_R floating point operations, one dest, and three source
//
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
void genPInvokeMethodEpilog();
- regNumber genPInvokeCallProlog(LclVarDsc * varDsc,
+ regNumber genPInvokeCallProlog(LclVarDsc* varDsc,
int argSize,
CORINFO_METHOD_HANDLE methodToken,
- BasicBlock * returnLabel);
+ BasicBlock* returnLabel);
- void genPInvokeCallEpilog(LclVarDsc * varDsc,
+ void genPInvokeCallEpilog(LclVarDsc* varDsc,
regMaskTP retVal);
regNumber genLclHeap (GenTreePtr size);
bool forLea,
regMaskTP regMask,
RegSet::KeepReg keepReg,
- regMaskTP * useMaskPtr,
+ regMaskTP* useMaskPtr,
bool deferOp = false);
regMaskTP genMakeRvalueAddressable(GenTreePtr tree,
RegSet::KeepReg keptReg);
GenTreePtr genMakeAddrOrFPstk (GenTreePtr tree,
- regMaskTP * regMaskPtr,
+ regMaskTP* regMaskPtr,
bool roundResult);
void genEmitGSCookieCheck(bool pushReg);
void genCondJump (GenTreePtr cond,
- BasicBlock * destTrue = NULL,
- BasicBlock * destFalse = NULL,
+ BasicBlock* destTrue = NULL,
+ BasicBlock* destFalse = NULL,
bool bStackFPFixup = true);
void genJCC (genTreeOps cmp,
- BasicBlock * block,
+ BasicBlock* block,
var_types type);
void genJccLongHi (genTreeOps cmp,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
bool unsOper = false);
void genJccLongLo (genTreeOps cmp,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse);
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse);
void genCondJumpLng (GenTreePtr cond,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
bool bFPTransition = false);
bool genUse_fcomip();
void genTableSwitch (regNumber reg,
unsigned jumpCnt,
- BasicBlock ** jumpTab);
+ BasicBlock** jumpTab);
regMaskTP WriteBarrier (GenTreePtr tgt,
GenTreePtr assignVal,
regMaskTP destReg,
regMaskTP bestReg = RBM_NONE);
- regNumber genIntegerCast(GenTree *tree, regMaskTP needReg, regMaskTP bestReg);
+ regNumber genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg);
void genCodeForNumericCast(GenTreePtr tree,
regMaskTP destReg,
void genCodeForSwitch (GenTreePtr tree);
regMaskTP genPushRegs (regMaskTP regs,
- regMaskTP * byrefRegs,
- regMaskTP * noRefRegs);
+ regMaskTP* byrefRegs,
+ regMaskTP* noRefRegs);
void genPopRegs (regMaskTP regs,
regMaskTP byrefRegs,
regMaskTP noRefRegs);
LclVarDsc* promotedStructLocalVarDesc,
emitAttr fieldSize,
unsigned* pNextPromotedStructFieldVar, // IN/OUT
- unsigned *pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
+ unsigned* pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
regNumber* pCurRegNum, // IN/OUT
int argOffset,
int fieldOffsetOfFirstStackSlot,
GenTreePtr genGetAddrModeBase (GenTreePtr tree);
GenTreePtr genIsAddrMode (GenTreePtr tree,
- GenTreePtr * indxPtr);
+ GenTreePtr* indxPtr);
private:
bool genIsLocalLastUse (GenTreePtr tree);
void genCodeForTreeStackFP_Cast (GenTreePtr tree);
void genCodeForTreeStackFP (GenTreePtr tree);
void genCondJumpFltStackFP (GenTreePtr cond,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
bool bDoTransition = true);
void genCondJumpFloat (GenTreePtr cond,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse);
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse);
void genCondJumpLngStackFP (GenTreePtr cond,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse);
-
- void genFloatConst(GenTree *tree, RegSet::RegisterPreference *pref);
- void genFloatLeaf(GenTree *tree, RegSet::RegisterPreference *pref);
- void genFloatSimple(GenTree *tree, RegSet::RegisterPreference *pref);
- void genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref);
- void genFloatCheckFinite(GenTree *tree, RegSet::RegisterPreference *pref);
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse);
+
+ void genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref);
+ void genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref);
+ void genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref);
+ void genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref);
+ void genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref);
void genLoadFloat(GenTreePtr tree, regNumber reg);
- void genFloatAssign(GenTree *tree);
- void genFloatArith(GenTree *tree, RegSet::RegisterPreference *pref);
- void genFloatAsgArith(GenTree *tree);
+ void genFloatAssign(GenTree* tree);
+ void genFloatArith(GenTree* tree, RegSet::RegisterPreference* pref);
+ void genFloatAsgArith(GenTree* tree);
regNumber genAssignArithFloat(genTreeOps oper,
GenTreePtr dst, regNumber dstreg,
GenTreePtr genMakeAddressableFloat(GenTreePtr tree,
- regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr,
+ regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr,
bool bCollapseConstantDoubles = true);
void genCodeForTreeFloat(GenTreePtr tree,
- RegSet::RegisterPreference *pref = NULL);
+ RegSet::RegisterPreference* pref = NULL);
void genCodeForTreeFloat(GenTreePtr tree,
regMaskTP needReg, regMaskTP bestReg);
GenTreePtr dst, regNumber dstreg,
GenTreePtr src, regNumber srcreg,
bool bReverse);
- void genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
- void genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
- void genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
- void genKeepAddressableFloat(GenTreePtr tree, regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr);
+ void genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+ void genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+ void genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+ void genKeepAddressableFloat(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
void genDoneAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
void genComputeAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg, regMaskTP needReg, RegSet::KeepReg keepReg, bool freeOnly = false);
void genRoundFloatExpression(GenTreePtr op, var_types type);
#endif
- GenTreePtr genMakeAddressableStackFP (GenTreePtr tree, regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr, bool bCollapseConstantDoubles = true);
- void genKeepAddressableStackFP (GenTreePtr tree, regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr);
+ GenTreePtr genMakeAddressableStackFP (GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr, bool bCollapseConstantDoubles = true);
+ void genKeepAddressableStackFP (GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
void genDoneAddressableStackFP (GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
regNumber genArithmStackFP (genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
regNumber genAsgArithmStackFP (genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
void genCondJmpInsStackFP (emitJumpKind jumpKind,
- BasicBlock * jumpTrue,
- BasicBlock * jumpFalse,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
bool bDoTransition = true);
void genTableSwitchStackFP (regNumber reg,
unsigned jumpCnt,
- BasicBlock ** jumpTab);
+ BasicBlock** jumpTab);
void JitDumpFPState ();
#else // !FEATURE_STACK_FP_X87
#endif // FEATURE_STACK_FP_X87
// Float spill
- void UnspillFloat (RegSet::SpillDsc *spillDsc);
+ void UnspillFloat (RegSet::SpillDsc* spillDsc);
void UnspillFloat (GenTreePtr tree);
- void UnspillFloat (LclVarDsc * varDsc);
- void UnspillFloatMachineDep (RegSet::SpillDsc *spillDsc);
+ void UnspillFloat (LclVarDsc* varDsc);
+ void UnspillFloatMachineDep (RegSet::SpillDsc* spillDsc);
void UnspillFloatMachineDep (RegSet::SpillDsc* spillDsc, bool useSameReg);
void RemoveSpillDsc (RegSet::SpillDsc* spillDsc);
{}
};
- void saveLiveness (genLivenessSet * ls);
- void restoreLiveness (genLivenessSet * ls);
- void checkLiveness (genLivenessSet * ls);
- void unspillLiveness (genLivenessSet * ls);
+ void saveLiveness (genLivenessSet* ls);
+ void restoreLiveness (genLivenessSet* ls);
+ void checkLiveness (genLivenessSet* ls);
+ void unspillLiveness (genLivenessSet* ls);
//-------------------------------------------------------------------------
//
#endif // LEGACY_BACKEND
#endif // _CODEGENCLASSIC_H_
+
// if they are fully-interruptible. So if we have a catch
// or finally that will keep frame-vars alive, we need to
// force fully-interruptible.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
printf("Method has EH, marking method as fully interruptible\n");
#endif
+
m_cgInterruptible = true;
}
#endif // JIT32_GCENCODER
unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
LclVarDsc * varDsc = lvaTable + lclNum;
- // Struct fields are not traversed in a consistent order, so ignore them when
- // verifying that we see the var nodes in execution order
#ifdef DEBUG
#if !defined(_TARGET_AMD64_) // no addr nodes on AMD and experimenting with with encountering vars in 'random' order
+ // Struct fields are not traversed in a consistent order, so ignore them when
+ // verifying that we see the var nodes in execution order
if (ForCodeGen)
{
if (tree->gtOper == GT_OBJ)
return true;
}
- // Else jitting.
-
#ifdef _TARGET_AMD64_
// If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
return true;
}
- // Else jitting.
-
#ifdef _TARGET_AMD64_
// By default all direct code addresses go through relocation so that VM will setup
// a jump stub if addr cannot be encoded as pc-relative offset.
#endif
AGAIN:
-
/* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
here if we find a scaled index.
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if SCALED_ADDR_MODES
assert(mul == 0);
/* Get hold of the index value */
ssize_t ixv = index->AsIntConCommon()->IconValue();
- /* Scale the index if necessary */
-
#if SCALED_ADDR_MODES
+ /* Scale the index if necessary */
if (tmpMul) ixv *= tmpMul;
#endif
cns += ixv;
- /* There is no scaled operand any more */
-
#if SCALED_ADDR_MODES
+ /* There is no scaled operand any more */
mul = 0;
#endif
rv2 = 0;
#ifndef LEGACY_BACKEND
#ifdef DEBUG
- // After code generation, dump the frame layout again. It should be the same as before code generation, if code generation
- // hasn't touched it (it shouldn't!).
+ // After code generation, dump the frame layout again. It should be the same as before code generation, if code
+ // generation hasn't touched it (it shouldn't!).
if (verbose)
{
compiler->lvaTableDump();
trackedStackPtrsContig = !compiler->opts.compDbgEnC;
#endif
- /* We're done generating code for this function */
#ifdef DEBUG
+ /* We're done generating code for this function */
compiler->compCodeGenDone = true;
#endif
*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
-// printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
+ // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
// Make sure that the x86 alignment and cache prefetch optimization rules
// were obeyed.
// Don't start a method in the last 7 bytes of a 16-byte alignment area
// unless we are generating SMALL_CODE
- //noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
+ // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
/* Now that the code is issued, we can finalize and emit the unwind data */
genReportEH();
- // Create and store the GC info for this method.
#ifdef JIT32_GCENCODER
#ifdef DEBUG
void* infoPtr =
#endif // DEBUG
#endif
+ // Create and store the GC info for this method.
genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
#ifdef DEBUG
bool processed; // true after we've processed the argument (and it is in its final location)
bool circular; // true if this register participates in a circular dependency loop.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// For UNIX AMD64 struct passing, the type of the register argument slot can differ from
// the type of the lclVar in ways that are not ascertainable from lvType.
// So, for that case we retain the type of the register in the regArgTab.
- // In other cases, we simply use the type of the lclVar to determine the type of the register.
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This is the UNIX_AMD64 implementation
var_types getRegType(Compiler* compiler)
{
return type; // UNIX_AMD64 implementation
#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This is the implementation for all other targets
+ // In other cases, we simply use the type of the lclVar to determine the type of the register.
var_types getRegType(Compiler* compiler)
{
LclVarDsc varDsc = compiler->lvaTable[varNum];
regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
noway_assert(regArgNum < argMax);
- // we better not have added it already (there better not be multiple vars representing this argument register)
+ // We better not have added it already (there better not be multiple vars representing this argument register)
noway_assert(regArgTab[regArgNum].slot == 0);
- // Set the register type.
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Set the register type.
regArgTab[regArgNum].type = regType;
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
noway_assert((regArgNum + i) < argMax);
- // we better not have added it already (there better not be multiple vars representing this argument register)
+ // We better not have added it already (there better not be multiple vars representing this argument register)
noway_assert(regArgTab[regArgNum + i].slot == 0);
regArgTab[regArgNum + i].varNum = varNum;
regType = regArgTab[regArgNum + i].getRegType(compiler);
regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
// The type of the register depends on the classification of the first eightbyte
// of the struct. For information on classification refer to the System V x86_64 ABI at:
// http://www.x86-64.org/documentation/abi.pdf
-#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
assert((i > 0) || (regNum == varDsc->lvArgReg));
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Is the arg dead on entry to the method ?
/* At this point, everything that has the "circular" flag
* set to "true" forms a circular dependency */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (regArgMaskLive)
// LSRA allocates registers to incoming parameters in order and will not overwrite
// a register still holding a live parameter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented.");
#endif // LEGACY_BACKEND
{
emitAttr size;
- // If this is the wrong register file, just continue.
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If this is the wrong register file, just continue.
if (regArgTab[argNum].type == TYP_UNDEF)
{
// This could happen if the reg in regArgTab[argNum] is of the other register file -
varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
- // If not a stack arg go to the next one
-
#ifndef _TARGET_64BIT_
+ // If not a stack arg go to the next one
if (varDsc->lvType == TYP_LONG)
{
if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
else
#endif // !_TARGET_64BIT_
{
+ // If not a stack arg go to the next one
if (!regArgTab[argNum].stackArg)
{
continue;
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
#ifndef _TARGET_64BIT_
- //Right now we think that incoming arguments are not pointer sized. When we eventually
- //understand the calling convention... this still won't be true. But maybe we'll have a better
- //idea of how to ignore it.
#ifndef _TARGET_ARM_
+ // Right now we think that incoming arguments are not pointer sized. When we eventually
+ // understand the calling convention, this still won't be true. But maybe we'll have a better
+ // idea of how to ignore it.
+
// On Arm, a long can be passed in register
noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void *));
#endif
/* With compInitMem, all untracked vars will have to be init'ed */
/* VSW 102460 - Do not force initialization of compiler generated temps,
- unless they are untracked GC type or structs that contain GC pointers */
+ unless they are untracked GC type or structs that contain GC pointers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if FEATURE_SIMD
// TODO-1stClassStructs
// This is here to duplicate previous behavior, where TYP_SIMD8 locals
// we will mess up already computed offsets on the stack (for ESP frames)
regSet.rsSetRegsModified(RBM_EDI);
+#ifdef UNIX_AMD64_ABI
// For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
// In such case use R12 and R13 registers.
-#ifdef UNIX_AMD64_ABI
if (maskCalleeRegArgMask & RBM_RCX)
{
regSet.rsSetRegsModified(RBM_R12);
assert(compiler->compGeneratingProlog);
#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
- // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack here.
- // Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not here.
+ // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
+ // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
+ // here.
regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
#else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
rsPushRegs |= RBM_FPBASE;
//
- // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
+ // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
// changes in GC suspension architecture.
//
- // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we generate
- // partially interruptible code for both the method that contains the tight loop with the call and the leaf method
- // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
- // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
+ // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+ // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+ // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+ // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
// be saved on the stack and the GC suspension would time out.
//
- // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
+ // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
// the following to make GC suspension work in the above scenario:
// - Make return address hijacking work even when lr is not saved on the stack.
// - Generate fully interruptible code for loops that contains calls
// - Generate fully interruptible code for leaf methods
- //
+ //
// Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
// is not worth it.
//
compiler->unwindPushMaskFloat(maskPushRegsFloat);
}
#elif defined(_TARGET_ARM64_)
- // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and options. Case numbers
- // in comments here refer to this document.
+ // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
+ // options. Case numbers in comments here refer to this document.
//
// For most frames, generate, e.g.:
- // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair ensures stack stays aligned.
- // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area at top of frame (highest addresses).
+ // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
+ // // ensures stack stays aligned.
+ // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
+ // // at top of frame (highest addresses).
// stp r21, r22, [sp, 0x70]
//
// Notes:
- // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers at the top of the frame.
+ // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
+ // at the top of the frame.
// 2. If we save FP, then the first store is FP, LR.
- // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only preserve their lower 8 bytes, by calling convention.
- // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are consecutive.
+ // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
+ // preserve their lower 8 bytes, by calling convention.
+ // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
+ // consecutive.
// 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
int totalFrameSize = genTotalFrameSize();
assert(maskSaveRegsFloat == RBM_NONE);
}
- int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we generate based on various sizes.
+ int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
+ // generate based on various sizes.
int calleeSaveSPDelta = 0;
int calleeSaveSPDeltaUnaligned = 0;
// sub sp,sp,#framesz
// stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
//
- // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with signed offset encoding.
+ // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
+ // signed offset encoding.
//
// After saving callee-saved registers, we establish the frame pointer with:
// add fp,sp,#outsz
{
// Case 5 or 6.
//
- // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index to subtract from SP
- // as the first instruction. It must also leave space for varargs registers to be stored. For example:
+ // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
+ // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
+ // stored. For example:
// stp r19,r20,[sp,#-96]!
// stp d8,d9,[sp,#16]
// ... save varargs incoming integer registers ...
- // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be lower on the stack than
- // the callee-saved registers (see lvaAlignFrame() for how we calculate alignment). So, if there is an odd number of
- // callee-saved registers, we use (for example, with just one saved register):
+ // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
+ // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
+ // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
+ // register):
// sub sp,sp,#16
// str r19,[sp,#8]
- // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be possible to have two 8-byte alignment
- // padding words, one below the callee-saved registers, and one above them. If that is preferable, we could implement it.
+ // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
+ // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
+ // above them. If that is preferable, we could implement it.
// Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
//
- // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment padding from above).
+ // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
+ // padding from above).
// Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
//
// Generate:
// stp fp,lr,[sp,#outsz]
// add fp,sp,#outsz
//
- // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can handle. And, once again,
- // we might need to deal with #outsz that is not aligned to 16-bytes (i.e., STACK_ALIGN). So, in the case of large #outsz we will
- // have an additional SP adjustment, using one of the following sequences:
+ // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
+ // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+ // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
+ // following sequences:
//
// Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
//
//
// Or:
//
- // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is always guaranteed to be 8 byte aligned).
+ // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
+ // // always guaranteed to be 8 byte aligned).
// stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
// add fp,sp,#8
// sub sp,sp,#outsz - #8
else if (frameSize < compiler->getVeryLargeFrameSize())
{
// Frame size is (0x1000..0x3000)
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if CPU_LOAD_STORE_ARCH
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
// complete since the tickles could cause a stack overflow, and we
// need to be able to crawl the stack afterward (which means the
// stack pointer needs to be known).
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_XARCH_
bool pushedStubParam = false;
else
{
// Generate:
- // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more careful
+ // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
+ // ; careful
int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
assert(spAdjustment3 > 0);
assert((spAdjustment3 % 16) == 0);
// Generate:
// ldp fp,lr,[sp,#outsz]
- // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if it's large
+ // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
+ // ; it's large
genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0, nullptr);
}
inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
}
- // For System V AMD64 calling convention ESI and EDI are volatile registers.
#ifndef UNIX_AMD64_ABI
+ // For System V AMD64 calling convention ESI and EDI are volatile registers.
if (regSet.rsRegsModified(RBM_ESI))
{
popCount++;
noway_assert(regSet.rsRegsModified(RBM_EDI));
- // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
#ifdef UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
{
noway_assert(regSet.rsRegsModified(RBM_R12));
regTracker.rsTrackRegTrash(REG_R13);
}
#else // !UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX
if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
{
noway_assert(regSet.rsRegsModified(RBM_ESI));
instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
instGen (INS_r_stosd);
- // Move back the argument registers
#ifdef UNIX_AMD64_ABI
+ // Move back the argument registers
if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
{
inst_RV_RV(INS_mov, REG_RCX, REG_R12);
inst_RV_RV(INS_mov, REG_RDI, REG_R13);
}
#else // !UNIX_AMD64_ABI
+ // Move back the argument registers
if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
{
inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
if (!varTypeIsGC(tempThis->tdTempType()))
continue;
-// printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
+ // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
}
{
if (isFramePointerUsed())
{
+#if defined(_TARGET_ARM_)
// lvStkOffs is always valid for incoming stack-arguments, even if the argument
// will become enregistered.
- //
// On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
-#if defined(_TARGET_ARM_)
noway_assert((2*REGSIZE_BYTES <= varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize+2*REGSIZE_BYTES));
#else
+ // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+ // will become enregistered.
noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
#endif
}
*pInitRegZeroed = false;
- // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
- // mov dword ptr [frame.GSSecurityCookie], reg
-
#if CPU_LOAD_STORE_ARCH
instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
regTracker.rsTrackRegTrash(reg);
#else
+ // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
+ // mov dword ptr [frame.GSSecurityCookie], reg
getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC,
reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
regTracker.rsTrackRegTrash(reg);
unsigned saveStackLvl2 = genStackLevel;
+#if defined(_TARGET_X86_)
// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
// for x86 stack unwinding
- //
// Push the profilerHandle
- //
-#if defined(_TARGET_X86_)
if (compiler->compProfilerMethHndIndirected)
{
getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
#else // LEGACY_BACKEND
+#if defined(_TARGET_X86_)
//
// Push the profilerHandle
//
-#if defined(_TARGET_X86_)
+
if (compiler->compProfilerMethHndIndirected)
{
getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
compiler->fgPtrArgCntMax = 1;
}
#elif defined(_TARGET_ARM_)
- // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock RBM_PROFILER_RET_USED always.
+ //
+ // Push the profilerHandle
+ //
+
+ // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock RBM_PROFILER_RET_USED always.
regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
regSet.rsLockReg(RBM_PROFILER_RET_USED);
}
else
{
- // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing profiler handle.
- // Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+ // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+ // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
{
attr = EA_GCREF;
genCheckUseBlockInit();
// Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_X86_)
+
if (compiler->compTailCallUsed)
{
// If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
}
#endif // DEBUG
+ // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
if (compiler->opts.compDbgEnC)
{
// We always save FP.
signed int loOffs = tempThis->tdTempOffs();
signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
- // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the previous
- // frame pointer. Thus, stkOffs can't be zero.
- // However, on amd64 there is no requirement to chain frame pointers.
+ // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
+ // previous frame pointer. Thus, stkOffs can't be zero.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if !defined(_TARGET_AMD64_)
+ // However, on amd64 there is no requirement to chain frame pointers.
+
noway_assert(!isFramePointerUsed() || loOffs != 0);
#endif // !defined(_TARGET_AMD64_)
-// printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
+ // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
hasUntrLcl = true;
#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
/*-------------------------------------------------------------------------
*
* Now start emitting the part of the prolog which sets up the frame
*/
-#ifdef _TARGET_ARM_
if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
{
inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
// Subtract the local frame size from SP.
//
//-------------------------------------------------------------------------
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_ARM64_
regMaskTP maskStackAlloc = RBM_NONE;
* Take care of register arguments first
*/
+ RegState *regState;
+
#ifndef LEGACY_BACKEND
// Update the arg initial register locations.
compiler->lvaUpdateArgsWithInitialReg();
#endif // !LEGACY_BACKEND
- RegState *regState;
-
FOREACH_REGISTER_FILE(regState)
{
if (regState->rsCalleeRegArgMaskLiveIn)
printf("\n");
#endif
- // On non-x86 the VARARG cookie does not need any special treatment.
#ifdef _TARGET_X86_
+ // On non-x86 the VARARG cookie does not need any special treatment.
// Load up the VARARG argument pointer register so it doesn't get clobbered.
// only do this if we actually access any statically declared args
if (compiler->compLclFrameSize)
{
- /* Add 'compiler->compLclFrameSize' to ESP */
-
#ifdef _TARGET_X86_
+ /* Add 'compiler->compLclFrameSize' to ESP */
/* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
if ( (compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed )
else
#endif // _TARGET_X86
{
+ /* Add 'compiler->compLclFrameSize' to ESP */
/* Generate "add esp, <stack-size>" */
inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
}
* The ARM funclet prolog sequence is:
*
* push {regs,lr} ; We push the callee-saved regs and 'lr'.
- * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we actually use
- * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for the
- * ; entire function.
+ * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
+ * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
+ * ; calculated for the entire function.
* sub sp, XXX ; Establish the rest of the frame.
* ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
* ; up to preserve stack alignment. If we push an odd number of registers, we also
* ; generate this, to keep the stack aligned.
*
- * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
- * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
*
* if (this is a filter funclet)
* {
* // } catch(Exception) {
* // throw new Exception(); // The exception thrown here ...
* // }
- * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
+ * // } filter { // ... will be processed here, while the "catch" funclet frame is
+ * // // still on the stack
* // } filter-handler {
* // }
* //
- * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
- * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
- * // create a main function PSP for any function with a filter.
+ * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
+ * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
+ * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
*
- * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
+ * ; the dynamically containing funclet or function)
* str r1, [sp + PSP_slot_SP_offset] ; store the PSP
* sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
* }
* +=======================+ <---- Caller's SP
* |Callee saved registers |
* |-----------------------|
- * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset in function and funclet
+ * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
+ * | | // in function and funclet
* |-----------------------|
* | PSP slot |
* |-----------------------|
* push ebp
* push callee-saved regs
* ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
- * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for the
- * ; entire function.
+ * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
+ * ; the entire function.
* sub sp, XXX ; Establish the rest of the frame.
* ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
* ; up to preserve stack alignment. If we push an odd number of registers, we also
* ; generate this, to keep the stack aligned.
*
- * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
- * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
* ; Also, re-establish the frame pointer from the PSP.
*
- * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the PSP of the dynamically containing funclet or function)
+ * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
+ * ; PSP of the dynamically containing funclet or function)
* mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
- * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If Function_InitialSP_to_FP_delta==0, we don't need this instruction.
+ * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
+ * ; Function_InitialSP_to_FP_delta==0, we don't need this
+ * ; instruction.
*
* The epilog sequence is then:
*
genFnProlog();
// Generate all the prologs and epilogs.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
// Maybe I should just set "fold" true in the call to genMakeAddressable above.
if (scaledIndex != NULL)
{
- int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK -- multiple of 2^6.
+ int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
+ // multiple of 2^6.
if (mul == 0)
{
mul = scale;
GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
lenOffset = arrLenExact->ArrLenOffset();
- // We always load the length into a register on ARM and x64.
#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+ // We always load the length into a register on ARM and x64.
+
// 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
// lengths, but the index expression *can* be native int (64-bits)
arrRef = arrLenExact->ArrRef();
genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
+#if CPU_LOAD_STORE_ARCH
/* Subtract the lower bound, and do the range check */
-#if CPU_LOAD_STORE_ARCH
regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
getEmitter()->emitIns_R_AR(
INS_ldr, EA_4BYTE,
index->gtRegNum,
valueReg);
#else
+ /* Subtract the lower bound, and do the range check */
getEmitter()->emitIns_R_AR(
INS_sub, EA_4BYTE,
index->gtRegNum,
if (compiler->gsGlobalSecurityCookieAddr == NULL)
{
// JIT case
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if CPU_LOAD_STORE_ARCH
+
regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE,
reg,
((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
{
/* Compare against any register to do null-check */
- #if defined(_TARGET_XARCH_)
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_XARCH_)
inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
#elif CPU_LOAD_STORE_ARCH
gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
#ifdef _TARGET_ARM_
+#if NOGC_WRITE_BARRIERS
// Finally, we may be required to spill whatever is in the further argument registers
// trashed by the call. The write barrier trashes some further registers --
// either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
-#if NOGC_WRITE_BARRIERS
+
regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
#else
regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
genJccLongLo(cmp, jumpTrue, jumpFalse);
/* Free up anything that was tied up by either operand */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if CPU_LOAD_STORE_ARCH
+
// Fix 388442 ARM JitStress WP7
regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
genReleaseRegPair(op2);
break;
case GT_NO_OP:
+ // The VM does certain things with actual NOP instructions
+ // so generate something small that has no effect, but isn't
+ // a typical NOP
if (tree->gtFlags & GTF_NO_OP_NO)
{
- // The VM does certain things with actual NOP instructions
- // so generate something small that has no effect, but isn't
- // a typical NOP
#ifdef _TARGET_XARCH_
// The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
instGen(INS_nop);
/* Argument was passed on the stack, but ended up in a register
* Store it back to the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_64BIT_
if (varDsc->TypeGet() == TYP_LONG)
noway_assert(!varDsc->lvRegister);
/* Reload it from the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_64BIT_
if (varDsc->TypeGet() == TYP_LONG)
#if FEATURE_STACK_FP_X87
/* Spill any register that hold partial values so that the exit liveness
from sides is the same */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
/* Generate jmp lab_done */
lab_done = genCreateTempLabel();
- // We would like to know here if the else node is really going to generate
- // code, as if it isn't, we're generating here a jump to the next instruction.
- // What you would really like is to be able to go back and remove the jump, but
- // we have no way of doing that right now.
-
#ifdef DEBUG
// We will use this to assert we don't emit instructions if we decide not to
// do the jmp
unsigned emittedInstructions = getEmitter()->emitInsCount;
bool bSkippedJump = false;
#endif
+ // We would like to know here if the else node is really going to generate
+ // code, as if it isn't, we're generating here a jump to the next instruction.
+ // What you would really like is to be able to go back and remove the jump, but
+ // we have no way of doing that right now.
+
if (
#if FEATURE_STACK_FP_X87
!bHasFPUState && // If there is no FPU state, we won't need an x87 transition
#endif
genIsEnregisteredIntVariable(thenNode) == reg)
{
+#ifdef DEBUG
// For the moment, fix this easy case (enregistered else node), which
// is the one that happens all the time.
-#ifdef DEBUG
+
bSkippedJump = true;
#endif
}
reg = regSet.rsPickReg(needReg, bestReg);
- /* Compute the value into the target: reg=op1*op2_icon */
-
#if LEA_AVAILABLE
+ /* Compute the value into the target: reg=op1*op2_icon */
if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
{
regNumber regSrc;
else
#endif // LEA_AVAILABLE
{
+ /* Compute the value into the target: reg=op1*op2_icon */
inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
}
inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
}
- /* Update the live set of register variables */
#ifdef DEBUG
+ /* Update the live set of register variables */
if (compiler->opts.varNames) genUpdateLife(tree);
#endif
/* Make the target addressable for load/store */
addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
- #if CPU_LOAD_STORE_ARCH
- // We always load from memory then store to memory
- #else
+ #if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
/* For small types with overflow check, we need to
sign/zero extend the result, so we need it in a reg */
if (ovfl && genTypeSize(treeType) < sizeof(int))
- #endif // CPU_LOAD_STORE_ARCH
+ #endif // !CPU_LOAD_STORE_ARCH
{
// Load op1 into a reg
addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
/* Compute the new value into the target register */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if CPU_HAS_BYTE_REGS
+
// Fix 383833 X86 ILGEN
regNumber reg2;
if ((op2->gtFlags & GTF_REG_VAL) != 0)
addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
regSet.rsLockUsedReg(addrReg);
-#if CPU_LOAD_STORE_ARCH
- // We always load from memory then store to memory
-#else
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
/* For small types with overflow check, we need to
sign/zero extend the result, so we need it in a reg */
if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // CPU_LOAD_STORE_ARCH
+#endif // !CPU_LOAD_STORE_ARCH
{
reg = regSet.rsPickReg();
regSet.rsLockReg(genRegMask(reg));
addrReg = genKeepAddressable(op1, addrReg);
regSet.rsLockUsedReg(addrReg);
-#if CPU_LOAD_STORE_ARCH
- // We always load from memory then store to memory
-#else
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
/* For small types with overflow check, we need to
sign/zero extend the result, so we need it in a reg */
if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // CPU_LOAD_STORE_ARCH
+#endif // !CPU_LOAD_STORE_ARCH
{
reg = regSet.rsPickReg();
/* Make sure the address registers are still here */
addrReg = genKeepAddressable(op1, addrReg, op2Regs);
- /* Perform the shift */
#ifdef _TARGET_XARCH_
+ /* Perform the shift */
inst_TT_CL(ins, op1);
#else
+ /* Perform the shift */
noway_assert(op2->gtFlags & GTF_REG_VAL);
op2Regs = genRegMask(op2->gtRegNum);
// On ARM, until proven otherwise by performance numbers, just do the shift.
// It's no bigger than add (16 bits for low registers, 32 bits for high registers).
// It's smaller than two "add reg, reg".
+
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef _TARGET_ARM_
if (oper == GT_LSH)
{
noway_assert(op1->gtFlags & GTF_REG_VAL);
reg = op1->gtRegNum;
- /* Perform the shift */
#ifdef _TARGET_ARM_
+ /* Perform the shift */
getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
#else
+ /* Perform the shift */
inst_RV_CL(ins, reg);
#endif
genReleaseReg(op2);
/*****************************************************************************
*
- * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
+ * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
+ * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
*/
void CodeGen::genCodeForRelop(GenTreePtr tree,
emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+#if CPU_USES_BLOCK_MOVE
// Materialize the trees in the order desired
-#if CPU_USES_BLOCK_MOVE
genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
#endif
+ // Materialize the trees in the order desired
bool helperUsed;
regNumber regDst;
regNumber regSrc;
}
/* Now take care of the remainder */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_64BIT_
if (length > 4)
#else // !CPU_USES_BLOCK_MOVE
#ifndef _TARGET_ARM_
- // Currently only the ARM implementation is provided
+// Currently only the ARM implementation is provided
#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
#endif
//
regTracker.rsTrackRegTrash(reg);
- /* Update the live set of register variables */
-
#ifdef DEBUG
+ /* Update the live set of register variables */
if (compiler->opts.varNames) genUpdateLife(tree);
#endif
/* Now we can update the register pointer information */
-// genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
gcInfo.gcMarkRegPtrVal(reg, treeType);
genCodeForTree_DONE_LIFE(tree, reg);
}
+#ifdef PROFILING_SUPPORTED
//The profiling hook does not trash registers, so it's safe to call after we emit the code for
//the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
+
if (compiler->compCurBB == compiler->genReturnBB)
{
genProfilingLeaveCallback();
if (ins == INS_add)
{
- genUpdateLife(tree); //If the operator was add, then we were called from the GT_LOCKADD
- //case. In that case we don't use the result, so we don't need to
- //update anything.
+ // If the operator was add, then we were called from the GT_LOCKADD
+ // case. In that case we don't use the result, so we don't need to
+ // update anything.
+ genUpdateLife(tree);
}
else
{
noway_assert(!(op1->gtFlags & GTF_REG_VAL));
genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-// gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
regMaskTP retMask = genCodeForCall(op2, true);
// Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
assert((mask & (mask - 1)) == 0);
op1Reg = op1->gtRegVar.gtRegNum;
+#ifdef DEBUG
/* Compute the RHS (hopefully) into the variable's register.
For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
as variables are kept alive everywhere. So we have to be
careful if we want to compute the value directly into
the variable's register. */
-#ifdef DEBUG
bool needToUpdateRegSetCheckLevel;
needToUpdateRegSetCheckLevel = false;
#endif
regGC = WriteBarrier(op1, op2, addrReg);
+ // Was assignment done by the WriteBarrier
if (regGC == RBM_NONE)
{
- // No, assignment was not done by the WriteBarrier
-
#ifdef _TARGET_ARM_
if (volat)
{
inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
}
- /* Update the current liveness info */
-
#ifdef DEBUG
+ /* Update the current liveness info */
if (compiler->opts.varNames) genUpdateLife(tree);
#endif
genReleaseReg(op2);
- /* Update the current liveness info */
-
#ifdef DEBUG
+ /* Update the current liveness info */
if (compiler->opts.varNames) genUpdateLife(tree);
#endif
{
#if defined(_TARGET_XARCH_)
// cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
- //
+
// Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
// right now, just get the trees evaluated.
// As a friendly reminder. IL args are evaluated left to right.
- //
+
GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
regMaskTP addrReg;
-
- // This little piggy (on the left) went to market.
bool isAddr = genMakeIndAddrMode(location,
tree,
false, /* not for LEA */
regSet.rsMarkRegUsed(location);
}
- // This little piggy (in the middle) went home.
// We must have a reg for the Value, but it doesn't really matter which register.
// Try to avoid EAX and the address regsiter if possible.
genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
- // This little piggy (on the right) had roast beef
+#ifdef DEBUG
// cmpxchg uses EAX as an implicit operand to hold the comparand
// We're going to destroy EAX in this operation, so we better not be keeping
// anything important in it.
-
-#ifdef DEBUG
if (RBM_EAX & regSet.rsMaskVars)
{
- //We have a variable enregistered in EAX. Make sure it goes dead in this tree.
+ // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
{
const LclVarDsc & varDesc = compiler->lvaTable[varNum];
continue;
if (varDesc.lvRegNum != REG_EAX)
continue;
- //I suppose I should technically check lvOtherReg.
+ // We may need to check lvOtherReg.
- //OK, finally. Let's see if this local goes dead.
- //If the variable isn't going dead during this tree, we've just trashed a local with
- //cmpxchg.
+ // If the variable isn't going dead during this tree, we've just trashed a local with
+ // cmpxchg.
noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
break;
#endif
genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
- //Oh, no more piggies.
- //* Author's note. I believe in bounty and chose to omit the piggy who got none.
-
-
//By this point we've evaluated everything. However the odds are that we've spilled something by
//now. Let's recover all the registers and force them to stay.
reg = REG_EAX;
- //Until I try to optimize a cmp after a cmpxchg, just trash the flags for safety's sake.
genFlagsEqualToNone();
break;
#else // not defined(_TARGET_XARCH_)
regSet.rsSpillBeg();
- /* Initialize the line# tracking logic */
-
#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
if (compiler->opts.compScopeInfo)
{
siInit();
}
/* Start a new code output block */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
#if defined(_TARGET_ARM_)
{
assert(block->bbFlags & BBF_JMP_TARGET);
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
#ifdef DEBUG
if (compiler->verbose)
{
printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
}
#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(
gcInfo.gcVarPtrSetCur,
* Generate code for each statement-tree in the block
*
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
if (block->bbFlags & BBF_FUNCLET_BEG)
regPair = regSet.rsPickRegPair(needReg);
/* Load the value into the registers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
-#if ! CPU_HAS_FP_SUPPORT
+#if !CPU_HAS_FP_SUPPORT
if (oper == GT_CNS_DBL)
{
noway_assert(sizeof(__int64) == sizeof(double));
loadIns = ins_Load(TYP_INT); // INS_ldr
regLo = genRegPairLo(regPair);
regHi = genRegPairHi(regPair);
- // assert(regLo != regHi); // regpair property
#if CPU_LOAD_STORE_ARCH
{
regTracker.rsTrackRegTrash(regLo);
regTracker.rsTrackRegTrash(regHi);
+ /* Unary "neg": negate the value in the register pair */
if (oper == GT_NEG)
{
- /* Unary "neg": negate the value in the register pair */
-
#ifdef _TARGET_ARM_
// ARM doesn't have an opcode that sets the carry bit like
}
else
{
-// printf("Overlap: needReg = %08X\n", needReg);
+ // printf("Overlap: needReg = %08X\n", needReg);
// Reg-prediction won't allow this
noway_assert((regSet.rsMaskVars & addrReg) == 0);
regPair = gen2regs2pair(regLo, regHi);
- /* Copy the lo32 bits from regLo to regHi and sign-extend it */
-
#ifdef _TARGET_ARM_
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
// Use one instruction instead of two
getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
#else
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
#endif
NYI("64-bit return");
#endif
+#ifdef PROFILING_SUPPORTED
//The profiling hook does not trash registers, so it's safe to call after we emit the code for
//the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
- /* XXX Thu 7/5/2007
- * Oh look. More cloned code from the regular processing of GT_RETURN.
- */
+
if (compiler->compCurBB == compiler->genReturnBB)
{
genProfilingLeaveCallback();
genPInvokeMethodEpilog();
}
+#ifdef PROFILING_SUPPORTED
//The profiling hook does not trash registers, so it's safe to call after we emit the code for
//the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
- /* XXX Thu 7/5/2007
- * Oh look. More cloned code from the regular processing of GT_RETURN.
- */
+
if (compiler->compCurBB == compiler->genReturnBB)
{
genProfilingLeaveCallback();
void * addr = NULL, **pAddr = NULL;
- // Don't ask VM if it hasn't requested ELT hooks
#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
+ // Don't ask VM if it hasn't requested ELT hooks
if (!compiler->compProfilerHookNeeded &&
compiler->opts.compJitELTHookEnabled &&
(helper == CORINFO_HELP_PROF_FCN_ENTER ||
*byrefRegs = RBM_NONE;
*noRefRegs = RBM_NONE;
-// noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
if (regs == RBM_NONE)
return RBM_NONE;
noway_assert((regs & byrefRegs) == byrefRegs);
noway_assert((regs & noRefRegs) == noRefRegs);
-// noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
noway_assert((regs & (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur)) == RBM_NONE);
noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
/* This is passed as a pointer-sized integer argument */
genCodeForTree(curr, 0);
+
+ // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
if (curr->gtFlags & GTF_LATE_ARG)
{
- // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
-
#ifdef _TARGET_ARM_
argSize = 0; // nothing is passed on the stack
#endif
else
{
// The arg is passed in the outgoing argument area of the stack frame
- //
+
assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
if (varDsc->lvPromoted &&
- promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
+ promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
+ // on stack.
{
assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
promotedStructLocalVarDesc = varDsc;
{
// The current slot should contain more than one field.
// We'll construct a word in memory for the slot, then load it into a register.
- // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current slot,
- // in which case we'll just skip this loop altogether.)
+ // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
+ // slot, in which case we'll just skip this loop altogether.)
while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
{
// If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
}
}
- // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to write to
- // an argument register, do so.
+ // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
+ // write to an argument register, do so.
if (curRegNum != MAX_REG_ARG)
{
noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
// home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
// table entry for the promoted struct local. As we fill slots with the contents of a
// promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
- // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're working
- // on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're done),
- // and "nextPromotedStructFieldVar" will be the local
- // variable number of the next field variable to be copied.
+ // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
+ // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
+ // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
+ // to be copied.
LclVarDsc* promotedStructLocalVarDesc = NULL;
unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
{
nextPromotedStructFieldVar++;
}
- // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the first stack slot is after
- // the last slot.
+ // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
+ // first stack slot is after the last slot.
assert(nextPromotedStructFieldVar < fieldVarLim|| firstStackSlot >= slots);
}
regSet.rsMarkRegFree(genRegMask(regSrc));
}
- if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs used.
+ if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
+ // used.
{
arg->gtFlags |= GTF_REG_VAL;
for (unsigned i = 1; i < firstStackSlot; i++)
args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize)
: argSize;
+#ifdef PROFILING_SUPPORTED
+
/*-------------------------------------------------------------------------
* Generate the profiling hooks for the call
*/
/* Treat special cases first */
-#ifdef PROFILING_SUPPORTED
-
/* fire the event at the call site */
/* alas, right now I can only handle calls via a method handle */
if (compiler->compIsProfilerHookNeeded() &&
//
// Push the profilerHandle
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_X86_
regMaskTP byrefPushedRegs;
regMaskTP norefPushedRegs;
// To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
// Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
//
- // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want to disturb them
- // and hence argument registers are locked here.
+ // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
+ // to disturb them and hence argument registers are locked here.
regMaskTP usedMask = RBM_NONE;
regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
noway_assert(callType == CT_USER_FUNC);
- vptrReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+ vptrReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
vptrMask = genRegMask(vptrReg);
/* The register no longer holds a live pointer value */
// a single indirection.
//
// For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if CPU_LOAD_STORE_ARCH
{
regNumber indReg = REG_TAILCALL_ADDR;
// a double indirection.
//
// For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if CPU_LOAD_STORE_ARCH
{
regNumber indReg = REG_TAILCALL_ADDR;
//
// The vast majority of calls end up here.... Wouldn't
// it be nice if they all did!
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_ARM_
if (!arm_Valid_Imm_For_BL((ssize_t)addr))
{
// Non-virtual direct calls to addresses accessed by
// a single indirection.
//
-#if CPU_LOAD_STORE_ARCH
+
// Load the address into a register, load indirect and call through a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if CPU_LOAD_STORE_ARCH
indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
gcInfo.gcRegByrefSetCur &= ~(curArgMask);
}
+#if !FEATURE_STACK_FP_X87
//-------------------------------------------------------------------------
// free up the FP args
-#if !FEATURE_STACK_FP_X87
for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
{
regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
genStackLevel = saveStackLvl;
/* No trashed registers may possibly hold a pointer at this point */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
+
regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & ~regSet.rsMaskVars & ~vptrMask;
if (ptrRegs)
{
}
/* Are we supposed to pop the arguments? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_X86_)
if (call->gtFlags & GTF_CALL_UNMANAGED)
UnspillFloat(call);
}
- // Mark as free
#if FEATURE_STACK_FP_X87
+ // Mark as free
regSet.SetUsedRegFloat(call, false);
#endif
}
var_types type = genActualType(size->gtType);
emitAttr easz = emitTypeSize(type);
+#ifdef DEBUG
// Verify ESP
- #ifdef DEBUG
if (compiler->opts.compStackCheckOnRet)
{
noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
getEmitter()->emitIns(INS_BREAKPOINT);
genDefineTempLabel(esp_check);
}
- #endif
+#endif
noway_assert(isFramePointerUsed());
noway_assert(genStackLevel == 0); // Can't have anything on the stack
if (compiler->info.compInitMem)
{
+#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
// regCnt will be the number of pointer-sized words to locAlloc
// If the shift right won't do the 'and' do it here
-#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
#endif
// --- shr regCnt, 2 ---
/* Since we have to zero out the allocated memory AND ensure that
ESP is always valid by tickling the pages, we will just push 0's
on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM_)
regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
mov ESP, REG
end:
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_ARM_
+
inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
inst_JMP(EJ_hs, loop);
#else
// note that it has to be done BEFORE the update of ESP since
// ESP might already be on the guard page. It is OK to leave
// the final value of ESP on the guard page
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if CPU_LOAD_STORE_ARCH
getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+#ifdef _TARGET_X86_
// Non-x86 platforms are allowed to access all arguments directly
// so we don't need this code.
-#ifdef _TARGET_X86_
// Is this a varargs function?
regTCB = REG_PINVOKE_TCB;
}
- /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
-
- // TODO-ARM-CQ: should we inline TlsGetValue here?
#if !defined(_TARGET_ARM_)
#define WIN_NT_TLS_OFFSET (0xE10)
#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
+ /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
+
+ // TODO-ARM-CQ: should we inline TlsGetValue here?
+
if (threadTlsIndex < 64)
{
// mov reg, FS:[0xE10+threadTlsIndex*4]
pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
#endif // _TARGET_X86_
- /* mov dword ptr [frame.callSiteReturnAddress], label */
-
#if CPU_LOAD_STORE_ARCH
regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
getEmitter()->emitIns_J_R (INS_adr,
compiler->lvaInlinedPInvokeFrameVar,
pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
#else // !CPU_LOAD_STORE_ARCH
+ /* mov dword ptr [frame.callSiteReturnAddress], label */
+
getEmitter()->emitIns_J_S (ins_Store(TYP_I_IMPL),
EA_PTRSIZE,
returnLabel,
else
{
/* mov reg2, dword ptr [tcb address] */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_ARM_
reg2 = REG_R2;
#else
regSet.rsSpillBeg();
+#ifdef DEBUGGING_SUPPORT
/* Initialize the line# tracking logic */
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compScopeInfo)
{
siInit();
genUpdateLife(block->bbLiveIn);
// Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
- // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
- // That would require handling the changes in recordVarLocationsAtStartOfBB().
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
regMaskTP newLiveRegSet = RBM_NONE;
regMaskTP newRegGCrefSet = RBM_NONE;
* Generate code for each statement-tree in the block
*
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
if (block->bbFlags & BBF_FUNCLET_BEG)
// Nothing to pop off from the stack.
if (compiler->lvaOutgoingArgSpaceSize > 0)
{
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
}
}
if (doNoInitLessThanOnePageAlloc)
- {
+ {
// Since the size is less than a page, simply adjust ESP.
// ESP might already be in the guard page, so we must touch it BEFORE
// the alloc, not after.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_X86_
// For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
// Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
if (op1->gtRegNum != dstReg)
{
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area for tail calls) in RDI.
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+ // for tail calls) in RDI.
// Destination is always local (on the stack) - use EA_PTRSIZE.
getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
}
// Move the values into the right registers.
//
- // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
+ // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
+ // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
+ // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
+ // genCodeForBBList().
if (type0 != TYP_UNKNOWN)
{
getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
}
#if FEATURE_VARARG && defined(_TARGET_AMD64_)
- // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
- // This is due to the AMD64 ABI which requires floating point values passed to varargs functions to be passed in
- // both integer and floating point registers. It doesn't apply to x86, which passes floating point values on the stack.
+ // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
+ // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
+ // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
+ // values on the stack.
if (compiler->info.compIsVarArgs)
{
regNumber intArgReg;
//
// Opcode Amd64 equivalent Comment
// ------ ----------------- --------
-// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above table.
-// jb
+// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
+// jb table
//
// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
// ja
}
else
{
+#ifdef DEBUG
// jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
// These are represented by two conditions.
-
-#ifdef DEBUG
if (tree->gtOper == GT_EQ)
{
// This must be an ordered comparison.
{
// We have a GC (byref or ref) pointer
// TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
- // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
- // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter
+ // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+ // instGen(INS_movsq); and emission of gc info.
var_types memType;
if (gcPtrs[i] == TYPE_GC_REF)
//
// Loads
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
#ifdef FEATURE_AVX_SUPPORT
}
break;
-#ifndef _TARGET_XARCH_
+#ifndef _TARGET_XARCH_
case 5:
case 6:
case 7:
{
// We have a (large) struct that can't be replaced with a "primitive" type
// and can't be passed in multiple registers
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
#if FEATURE_STACK_FP_X87
fprintf(fout, "Offset / size of gtFPlvl = %2u / %2u\n", offsetof(GenTree, gtFPlvl ), sizeof(gtDummy->gtFPlvl ));
#endif // FEATURE_STACK_FP_X87
-// TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree class (see https://github.com/dotnet/coreclr/pull/493)
-// fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ), sizeof(gtDummy->_gtCostEx ));
-// fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ), sizeof(gtDummy->_gtCostSz ));
+ // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree class (see https://github.com/dotnet/coreclr/pull/493)
+ // fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ), sizeof(gtDummy->_gtCostEx ));
+ // fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ), sizeof(gtDummy->_gtCostSz ));
fprintf(fout, "Offset / size of gtFlags = %2u / %2u\n", offsetof(GenTree, gtFlags ), sizeof(gtDummy->gtFlags ));
fprintf(fout, "Offset / size of gtVNPair = %2u / %2u\n", offsetof(GenTree, gtVNPair ), sizeof(gtDummy->gtVNPair ));
fprintf(fout, "Offset / size of gtRsvdRegs = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs ), sizeof(gtDummy->gtRsvdRegs ));
if (size == 0 || size >= 4)
return compRegVarName(reg, true);
+ // clang-format off
static
const char * sizeNames[][2] =
{
{ "r15b", "r15w" },
#endif // _TARGET_AMD64_
};
+ // clang-format on
assert(isByteReg (reg));
assert(genRegMask(reg) & RBM_BYTE_REGS);
//
// Processor specific optimizations
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_AMD64_
opts.compUseFCOMI = false;
opts.compUseCMOV = true;
opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
opts.compDbgEnC = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0;
- // We never want to have debugging enabled when regenerating GC encoding patterns
#if REGEN_SHORTCUTS || REGEN_CALLPAT
+ // We never want to have debugging enabled when regenerating GC encoding patterns
opts.compDbgCode = false;
opts.compDbgInfo = false;
opts.compDbgEnC = false;
compProfilerMethHndIndirected = false;
}
- // Right now this ELT hook option is enabled only for arm and amd64
#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ // Right now this ELT hook option is enabled only for arm and amd64
+
// Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler
// hooks in the first place. That is, Override VM only if it hasn't asked for a
// profiler callback for this method.
// IMPORTANT, after this point, every place where tree topology changes must redo evaluation
// order (gtSetStmtInfo) and relink nodes (fgSetStmtSeq) if required.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
// Now we have determined the order of evaluation and the gtCosts for every node.
#endif // _TARGET_ARMARCH_
/* Assign registers to variables, etc. */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef LEGACY_BACKEND
///////////////////////////////////////////////////////////////////////////////
}
#endif // FUNC_INFO_LOGGING
-// if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
+ // if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
info.compCompHnd = compHnd;
info.compMethodHnd = methodHnd;
// with an ARM-targeting "altjit").
info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
+#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
// ToDo: This code is to allow us to run UNIX codegen on Windows for now. Remove when appropriate.
// Make sure that the generated UNIX altjit code is skipped on Windows. The static jit codegen is used to run.
-#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
info.compMatchedVM = false;
#endif // UNIX_AMD64_ABI
static bool headerPrinted = false;
if (!headerPrinted)
{
+ // clang-format off
headerPrinted = true;
printf(" | Profiled | Exec- | Method has | calls | Num |LclV |AProp| CSE | Reg |bytes | %3s code size | \n", Target::g_tgtCPUName);
printf(" mdToken | | RGN | Count | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt | Alloc | IL | HOT | COLD | method name \n");
printf("---------+-----+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-------+-----------\n");
- // 06001234 | PRF | HOT | 219 | EH | ebp | LOOP | 15 | 6 | 12 | 17 | 12 | 8 | 28 p2 | 145 | 211 | 123 | System.Example(int)
+ // 06001234 | PRF | HOT | 219 | EH | ebp | LOOP | 15 | 6 | 12 | 17 | 12 | 8 | 28 p2 | 145 | 211 | 123 | System.Example(int)
+ // clang-format on
}
printf("%08X | ", currentMethodToken);
pParam->pComp->prevCompiler = JitTls::GetCompiler();
JitTls::SetCompiler(pParam->pComp);
-///PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
+// PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
#if defined(_PREFAST_) || defined(_PREFIX_)
PREFIX_ASSUME(pParam->pComp != NULL);
#else
// Completes the timing of the current method, and adds it to "sum".
void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
{
- // Otherwise...
#ifdef DEBUG
unsigned __int64 totCycles2 = 0;
for (int i = 0; i < PHASE_NUMBER_OF; i++)
* cReach, dReach : Display all block reachability (call fgDispReach()).
* cDoms, dDoms : Display all block dominators (call fgDispDoms()).
* cLiveness, dLiveness : Display per-block variable liveness (call fgDispBBLiveness()).
- * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable indices.
- * These are converted to variable numbers and sorted. (Calls dumpConvertedVarSet()).
+ * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
+ * indices. These are converted to variable numbers and sorted. (Calls
+ * dumpConvertedVarSet()).
*
* cFuncIR, dFuncIR : Display all the basic blocks of a function in linear IR form.
* cLoopIR, dLoopIR : Display a loop in linear IR form.
chars += printf("flags=");
// Node flags
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(DEBUG)
#if SMALL_TREE_NODES
private:
- regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair).
- // For LEGACY_BACKEND, this is only set if lvRegister is non-zero. For non-LEGACY_BACKEND, it is set during codegen
- // any time the variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set to non-zero if the
- // variable gets the same register assignment for its entire lifetime).
+ regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a
+ // register pair). For LEGACY_BACKEND, this is only set if lvRegister is
+ // non-zero. For non-LEGACY_BACKEND, it is set during codegen any time the
+ // variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set
+ // to non-zero if the variable gets the same register assignment for its entire
+ // lifetime).
#if !defined(_TARGET_64BIT_)
regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
#endif // !defined(_TARGET_64BIT_)
// For a finally handler, find the region index that the BBJ_CALLFINALLY lives in that calls the handler,
// or NO_ENCLOSING_INDEX if the BBJ_CALLFINALLY lives in the main function body. Normally, the index
// is the same index as the handler (and the BBJ_CALLFINALLY lives in the 'try' region), but for AMD64 the
- // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function body.
- // If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the BBJ_CALLFINALLY
- // lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never lives in a filter.)
+ // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function
+ // body. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the
+ // BBJ_CALLFINALLY lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never
+ // lives in a filter.)
unsigned ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion);
// Find the range of basic blocks in which all BBJ_CALLFINALLY will be found that target the 'finallyIndex' region's
GenTreePtr gtGetThisArg(GenTreePtr call);
// Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the
- // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but complicates
- // the JIT somewhat. This predicate returns "true" iff a node with type "fieldNodeType", representing the given "fldHnd",
- // is such an object pointer.
+ // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but
+ // complicates the JIT somewhat. This predicate returns "true" iff a node with type "fieldNodeType", representing
+ // the given "fldHnd", is such an object pointer.
bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
// Return true if call is a recursive call; return false otherwise.
GenTreePtr gtFoldExpr (GenTreePtr tree);
GenTreePtr
#ifdef __clang__
- // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is refactored in a simpler code.
- // This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in release build the optimizer is mistyping
- // (or just wrongly decides to use 32 bit operation for a corner case of MIN_LONG) the args of the (ltemp / lval2)
- // to int (it does a 32 bit div operation instead of 64 bit) - see the implementation of the method in gentree.cpp.
- // For the case of lval1 and lval2 equal to MIN_LONG (0x8000000000000000) this results in raising a SIGFPE.
- // The method implementation is rather complex. Disable optimizations for now.
+ // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is
+ // refactored in a simpler code. This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in
+ // release build the optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner case
+ // of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div operation instead of 64 bit) - see
+ // the implementation of the method in gentree.cpp. For the case of lval1 and lval2 equal to MIN_LONG
+ // (0x8000000000000000) this results in raising a SIGFPE. The method implementation is rather complex. Disable
+ // optimizations for now.
__attribute__((optnone))
#endif // __clang__
gtFoldExprConst(GenTreePtr tree);
// On architectures whose ABIs allow structs to be passed in registers, struct promotion will sometimes
// require us to "rematerialize" a struct from it's separate constituent field variables. Packing several sub-word
// field variables into an argument register is a hard problem. It's easier to reserve a word of memory into which
- // such field can be copied, after which the assembled memory word can be read into the register. We will allocate this
- // variable to be this scratch word whenever struct promotion occurs.
+ // such field can be copied, after which the assembled memory word can be read into the register. We will allocate
+ // this variable to be this scratch word whenever struct promotion occurs.
unsigned lvaPromotedStructAssemblyScratchVar;
#endif // _TARGET_ARM_
unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information
BasicBlock** fgBBInvPostOrder; // The flow graph stored in an array sorted in topological order, needed to compute dominance. Indexed by block number. Size: fgBBNumMax + 1.
- // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute dominance queries in O(1).
- // fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and postorder number, respectively.
- // The arrays are indexed by basic block number. (Note that blocks are numbered starting from one. Thus, we always waste
- // element zero. This makes debugging easier and makes the code less likely to suffer from bugs stemming from forgetting
- // to add or subtract one from the block number to form an array index). The arrays are of size fgBBNumMax + 1.
+ // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute
+ // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and
+ // postorder number, respectively. The arrays are indexed by basic block number. (Note that blocks are numbered
+ // starting from one. Thus, we always waste element zero. This makes debugging easier and makes the code less likely
+ // to suffer from bugs stemming from forgetting to add or subtract one from the block number to form an array
+ // index). The arrays are of size fgBBNumMax + 1.
unsigned * fgDomTreePreOrder;
unsigned * fgDomTreePostOrder;
void fgInterBlockLocalVarLiveness();
// The presence of "x op= y" operations presents some difficulties for SSA: this is both a use of some SSA name of
- // "x", and a def of a new SSA name for "x". The tree only has one local variable for "x", so it has to choose whether
- // to treat that as the use or def. It chooses the "use", and thus the old SSA name. This map allows us to record/recover
- // the "def" SSA number, given the lcl var node for "x" in such a tree.
+ // "x", and a def of a new SSA name for "x". The tree only has one local variable for "x", so it has to choose
+ // whether to treat that as the use or def. It chooses the "use", and thus the old SSA name. This map allows us
+ // to record/recover the "def" SSA number, given the lcl var node for "x" in such a tree.
typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, unsigned, JitSimplerHashBehavior> NodeToUnsignedMap;
NodeToUnsignedMap* m_opAsgnVarDefSsaNums;
NodeToUnsignedMap* GetOpAsgnVarDefSsaNums()
// (performed by fgComputeDoms), this procedure builds the dominance tree represented
// adjacency lists.
- // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder traversal of the dominance tree and the
- // dominance query will become A dominates B iif preOrder(A) <= preOrder(B) && postOrder(A) >= postOrder(B) making the computation O(1).
+ // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder
+ // traversal of the dominance tree and the dominance query will become A dominates B iif preOrder(A) <= preOrder(B)
+ // && postOrder(A) >= postOrder(B) making the computation O(1).
void fgTraverseDomTree (unsigned bbNum,
BasicBlockList** domTree,
unsigned* preNum,
// all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
// small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second
// argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
- // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes representing
- // method call argument lists.)
+ // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes
+ // representing method call argument lists.)
enum MorphAddrContextKind {
MACK_Ind,
MACK_Addr,
// outside of that loop. Exempt expressions whose value number is in "hoistedInParents"; add VN's of hoisted
// expressions to "hoistInLoop".
// Returns "true" iff "tree" is loop-invariant (wrt "lnum").
- // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before any
- // possible globally visible side effects. Assume is called in evaluation order, and updates this.
+ // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before
+ // any possible globally visible side effects. Assume is called in evaluation order, and updates this.
bool optHoistLoopExprsForTree(GenTreePtr tree,
unsigned lnum,
LoopHoistContext* hoistCtxt,
private:
// Requires "lnum" to be the index of an outermost loop in the loop table. Traverses the body of that loop,
- // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and static)
- // written to, and SZ-array element type equivalence classes updated.
+ // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and
+ // static) written to, and SZ-array element type equivalence classes updated.
void optComputeLoopNestSideEffects(unsigned lnum);
// Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part.
public:
- // A "LoopDsc" describes a ("natural") loop. We (currently) require the body of a loop to be a contiguous (in bbNext order)
- // sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered" in bbNext order;
- // we use comparisons on the bbNum to decide order.)
+ // A "LoopDsc" describes a ("natural") loop. We (currently) require the body of a loop to be a contiguous (in
+ // bbNext order) sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered"
+ // in bbNext order; we use comparisons on the bbNum to decide order.)
// The blocks that define the body are
// first <= top <= entry <= bottom .
- // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a single 'head' block.
- // The meanings of these blocks are given in the definitions below. Also see the picture at Compiler::optFindNaturalLoops().
+ // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a
+ // single 'head' block. The meanings of these blocks are given in the definitions below. Also see the picture at
+ // Compiler::optFindNaturalLoops().
struct LoopDsc
{
BasicBlock * lpHead; // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor.
#define LPFLG_DONT_UNROLL 0x2000 // do not unroll this loop
#define LPFLG_ASGVARS_YES 0x4000 // "lpAsgVars" has been computed
-#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet tyep are assigned to.
+#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
+ // type are assigned to.
bool lpLoopHasHeapHavoc; // The loop contains an operation that we assume has arbitrary heap side effects.
// loop nested in "loopInd" that shares the same head as "loopInd".
void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
- // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in "redirectMap",
- // change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
+ // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in
+ // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
// Marks the containsCall information to "lnum" and any parent loops.
// ICorStaticInfo wrapper functions
+ bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
+
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#ifdef DEBUG
static void dumpSystemVClassificationType(SystemVClassificationType ct);
/*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
template<typename ParamType>
bool eeRunWithErrorTrap(void (*function)(ParamType*), ParamType* param)
// Utility functions
+ const char * eeGetFieldName (CORINFO_FIELD_HANDLE fieldHnd,
+ const char ** classNamePtr = NULL);
+
#if defined(DEBUG)
const wchar_t * eeGetCPString (size_t stringHandle);
#endif
- const char * eeGetFieldName (CORINFO_FIELD_HANDLE fieldHnd,
- const char ** classNamePtr = NULL);
const char* eeGetClassName (CORINFO_CLASS_HANDLE clsHnd);
static CORINFO_METHOD_HANDLE eeFindHelper (unsigned helper);
regMaskTP compNoGCHelperCallKillSet (CorInfoHelpFunc helper);
#ifdef _TARGET_ARM_
- // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at "firstArgRegNum",
- // which is assumed to have already been aligned to the register alignment restriction of the struct type.
- // Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" -- i.e., internal
- // "holes" caused by internal alignment constraints. For example, if the struct contained an int and a double, and we
- // at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
+ // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at
+ // "firstArgRegNum", which is assumed to have already been aligned to the register alignment restriction of the
+ // struct type. Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" --
+ // i.e., internal "holes" caused by internal alignment constraints. For example, if the struct contained an int and
+ // a double, and we at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
void fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
unsigned firstArgRegNum,
regMaskTP* pArgSkippedRegMask);
return emitTypeSize(TYP_SIMD8);
}
- // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.)
#ifdef FEATURE_AVX_SUPPORT
+ // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.)
static const unsigned maxPossibleSIMDStructBytes = 32;
#else // !FEATURE_AVX_SUPPORT
static const unsigned maxPossibleSIMDStructBytes = 16;
bool compBlkOpUsed; // Does the method do a COPYBLK or INITBLK
#endif
+#ifdef DEBUG
// State information - which phases have completed?
// These are kept together for easy discoverability
-#ifdef DEBUG
bool bRangeAllowStress;
bool compCodeGenDone;
int64_t compNumStatementLinksTraversed; // # of links traversed while doing debug checks
bool getNeedsGSSecurityCookie() const { return compNeedsGSSecurityCookie; }
void setNeedsGSSecurityCookie() { compNeedsGSSecurityCookie = true; }
- FrameLayoutState lvaDoneFrameLayout; // The highest frame layout state that we've completed. During frame layout calculations,
- // this is the level we are currently computing.
+ FrameLayoutState lvaDoneFrameLayout; // The highest frame layout state that we've completed. During
+ // frame layout calculations, this is the level we are currently
+ // computing.
//---------------------------- JITing options -----------------------------
bool compNeedSecurityCheck; // This flag really means where or not a security object needs
// to be allocated on the stack.
-
// It will be set to true in the following cases:
// 1. When the method being compiled has a declarative security
// (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
// This flag is indicating if there is a need to align the frame.
// On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
// FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
- // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
- // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
- // are calls and making sure the frame alignment logic is executed.
+ // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of
+ // 0. The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that
+ // there are calls and making sure the frame alignment logic is executed.
bool compNeedToAlignFrame;
#endif // UNIX_AMD64_ABI
#endif // DEBUG
-
+// clang-format off
#define STRESS_MODES \
\
STRESS_MODE(NONE) \
STRESS_MODES
#undef STRESS_MODE
};
+// clang-format on
#ifdef DEBUG
static
// current number of EH clauses (after additions like synchronized
// methods and funclets, and removals like unreachable code deletion).
- bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler and the VM expects that,
- // or the JIT is a "self-host" compiler (e.g., x86 hosted targeting x86) and the VM expects that.
+ bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler
+ // and the VM expects that, or the JIT is a "self-host" compiler
+ // (e.g., x86 hosted targeting x86) and the VM expects that.
#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
//
// 3. Windows 64-bit native calling convention also requires the address of RetBuff
// to be returned in RAX.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_AMD64_
return (info.compRetBuffArg != BAD_VAR_NUM);
#else // !_TARGET_AMD64_
CORJIT_FLAGS * compileFlags);
// Data required for generating profiler Enter/Leave/TailCall hooks
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef PROFILING_SUPPORTED
bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
void *compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks
// having multiple try native code regions for a single try il region. This is doable and shouldnt be
// a big change in the exception.
//
- // Given the low frequency of the cases where we have transition blocks, I've decided to dumb down optimizations
- // For these 2 cases:
+ // Given the low frequency of the cases where we have transition blocks, I've decided to dumb down
+ // optimizations. For these 2 cases:
//
// - When there is a chance that we will have FP transition blocks, we won't do procedure splitting.
// - When a method has a handler, it won't enregister any FP variables that go thru a conditional long or
typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, FieldSeqNode*, JitSimplerHashBehavior> NodeToFieldSeqMap;
- // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since the offset of
- // the field is zero, there's no "GT_ADD" node. We normally attach a field sequence to the constant that is
- // added, but what do we do when that constant is zero, and is thus not present? We use this mechanism to
+ // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since
+ // the offset of the field is zero, there's no "GT_ADD" node. We normally attach a field sequence to the constant
+ // that is added, but what do we do when that constant is zero, and is thus not present? We use this mechanism to
// attach the field sequence directly to the address node.
NodeToFieldSeqMap* m_zeroOffsetFieldMap;
// record the the field sequence using the ZeroOffsetFieldMap described above.
//
// One exception above is that "op1" is a node of type "TYP_REF" where "op1" is a GT_LCL_VAR.
- // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in CoreRT.
- // Such case is handled same as the default case.
+ // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in
+ // CoreRT. Such case is handled same as the default case.
void fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq);
NodeToUnsignedMap* m_heapSsaMap;
- // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap states.
- // (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state, all the possible
- // heap states are possible initial states of the corresponding catch block(s).)
+ // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
+ // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
+ // all the possible heap states are possible initial states of the corresponding catch block(s).)
NodeToUnsignedMap* GetHeapSsaMap()
{
Compiler* compRoot = impInlineRoot();
size_t genTreeNodeCnt;
size_t genTreeNodeSize; // The size we allocate
- size_t genTreeNodeActualSize; // The actual size of the node. Note that the actual size will likely be smaller than the
- // allocated size, but we sometimes use SetOper()/ChangeOper() to change a smaller node
- // to a larger one. TODO-Cleanup: add stats on SetOper()/ChangeOper() usage to quanitfy this.
+ size_t genTreeNodeActualSize; // The actual size of the node. Note that the actual size will likely be smaller
+ // than the allocated size, but we sometimes use SetOper()/ChangeOper() to change
+ // a smaller node to a larger one. TODO-Cleanup: add stats on
+ // SetOper()/ChangeOper() usage to quanitfy this.
};
extern NodeSizeStats genNodeSizeStats; // Total node size stats
extern NodeSizeStats genNodeSizeStatsPerFunc; // Per-function node size stats
if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
{
#ifdef _TARGET_AMD64_
- // On amd64, every param has a stack location, except on Unix-like systems.
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On amd64, every param has a stack location, except on Unix-like systems.
assert(varDsc->lvIsParam);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
if (!FPbased)
{
// Worst case stack based offset.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_FIXED_OUT_ARGS
int outGoingArgSpaceSize = lvaOutgoingArgSpaceSize;
#else
else
{
// Worst case FP based offset.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_ARM_
offset = codeGen->genCallerSPtoInitialSPdelta() - codeGen->genCallerSPtoFPdelta();
#else
{
LclVarDsc * varDsc = lvaTable + varNum;
// Should never write to or take the address of the original 'this' arg
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef JIT32_GCENCODER
// With the general encoder/decoder, when the original 'this' arg is needed as a generics context param, we
// copy to a new local, and mark the original as DoNotEnregister, to
- // ensure that it is stack-allocated. It should not be the case that the original one can be modified -- it should
- // not be written to, or address-exposed.
+ // ensure that it is stack-allocated. It should not be the case that the original one can be modified -- it
+ // should not be written to, or address-exposed.
assert(!varDsc->lvArgWrite && (!varDsc->lvAddrExposed || ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)));
#else
assert(!varDsc->lvArgWrite && !varDsc->lvAddrExposed);
#ifdef PROFILING_SUPPORTED
return compProfilerHookNeeded
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
// IL stubs are excluded by VM and we need to do the same even running
// under a complus env hook to generate profiler hooks
-#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
|| (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
#endif
;
return PROMOTION_TYPE_DEPENDENT;
}
- // we have a parameter that could be enregistered
+ // We have a parameter that could be enregistered
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
// The struct parameter is a register candidate
return PROMOTION_TYPE_INDEPENDENT;
#else
(lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
{
// SSA must exclude struct fields that are not independent
- // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct assignment.
+ // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct
+ // assignment.
// - SSA doesn't allow a single node to contain multiple SSA definitions.
// - and PROMOTION_TYPE_DEPENDEDNT fields are never candidates for a register.
//
// This list of macro invocations should be used to define the CompMemKind enumeration,
// and the corresponding array of string names for these enum members.
+// clang-format off
CompMemKindMacro(AssertionProp)
CompMemKindMacro(ASTNode)
CompMemKindMacro(InstDesc)
CompMemKindMacro(LoopOpt)
CompMemKindMacro(LoopHoist)
CompMemKindMacro(Unknown)
+//clang-format on
#undef CompMemKindMacro
// (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
// "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
+// clang-format off
CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1)
CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1)
CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1)
CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1)
CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1)
CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1)
+// clang-format on
#undef CompPhaseNameMacro
void DecomposeLongs::DecomposeStmt(GenTreeStmt* stmt)
{
GenTree* savedStmt = m_compiler->compCurStmt; // We'll need to restore this later, in case this call was recursive.
- m_compiler->compCurStmt = stmt; // Publish the current statement globally. One reason: fgInsertEmbeddedFormTemp requires it.
+ m_compiler->compCurStmt = stmt; // Publish the current statement globally. One reason:
+ // fgInsertEmbeddedFormTemp requires it.
m_compiler->fgWalkTreePost(&stmt->gtStmt.gtStmtExpr, &DecomposeLongs::DecompNodeHelper, this, true);
m_compiler->compCurStmt = savedStmt;
}
SimpleLinkNodeAfter(addrHigh, storeIndHigh);
// External links of storeIndHigh tree
- //dataHigh->gtPrev = nullptr;
+ // dataHigh->gtPrev = nullptr;
if (isEmbeddedStmt)
{
// If storeIndTree is an embedded statement, connect storeIndLow
#endif // !_TARGET_64BIT_
-#endif // !LEGACY_BACKEND
\ No newline at end of file
+#endif // !LEGACY_BACKEND
switch (terminationType)
{
-// int disCallSize;
+ // int disCallSize;
case DISX86::trmtaJmpShort:
case DISX86::trmtaJmpCcShort:
/* find the emitter block and the offset of the call fixup */
/* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
-// disCallSize = 1;
+ // disCallSize = 1;
{
size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
switch (terminationType)
{
-// int disCallSize;
+ // int disCallSize;
case DISARM64::TRMTA::trmtaBra:
case DISARM64::TRMTA::trmtaBraCase:
/* find the emitter block and the offset of the call fixup */
/* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
-// disCallSize = 1;
+ // disCallSize = 1;
{
size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
return;
}
- // Should we make it diffable?
#ifdef DEBUG
+ // Should we make it diffable?
disDiffable = disComp->opts.dspDiffable;
#else // !DEBUG
// NOTE: non-debug builds are always diffable!
/* Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section) */
const BYTE* disGetLinearAddr(size_t offset);
- /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset points to */
+ /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset
+ * points to */
size_t disGetBufferSize(size_t offset);
// Map of instruction addresses to call target method handles for normal calls.
bool printit = false,
bool dispOffs = false,
bool dispCodeBytes = false);
-
};
// Everything fits into a single 'slot' size
// to accommodate irregular sized structs, they are passed byref
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
CORINFO_CLASS_HANDLE argClass;
// make certain the EE passes us back the right thing for refanys
assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2*sizeof(void*));
-#if FEATURE_MULTIREG_ARGS
// For each target that supports passing struct args in multiple registers
// apply the target specific rules for them here:
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_MULTIREG_ARGS
#if defined(_TARGET_ARM64_)
// Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
if (structSize > MAX_PASS_MULTIREG_BYTES)
return TARGET_POINTER_SIZE;
}
}
+ // otherwise will we pass this struct by value in multiple registers
}
- // otherwise will we pass this struct by value in multiple registers
- //
#elif defined(_TARGET_ARM_)
// otherwise will we pass this struct by value in multiple registers
-#else //
+#else
NYI("unknown target");
#endif // defined(_TARGET_XXX_)
#endif // FEATURE_MULTIREG_ARGS
{
assert(exceptionPointers->ExceptionRecord->ExceptionCode != SEH_VERIFICATION_EXCEPTION);
- // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to verification exceptions
- // if we are verifying. Verification exceptions will cause the JIT of the basic block to fail, but the JITing of the whole method
- // is still going to succeed. This is done for backward compatibility only. Ideally, we would always treat bad tokens in the IL
- // stream as fatal errors.
+ // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to
+ // verification exceptions if we are verifying. Verification exceptions will cause the JIT of the basic block to
+ // fail, but the JITing of the whole method is still going to succeed. This is done for backward compatibility only.
+ // Ideally, we would always treat bad tokens in the IL stream as fatal errors.
if (exceptionPointers->ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
{
auto* param = reinterpret_cast<TryResolveTokenFilterParam*>(theParam);
emitSizeMethod += sz;
#endif
-// printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
+ // printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
/* Record the live GC register set - if and only if it is not an emitter added block */
size_t of = (BYTE*)oj - emitCurIGfreeBase;
instrDescJmp* nj = (instrDescJmp*)(ig->igData + of);
-// printf("Jump moved from %08X to %08X\n", oj, nj);
-// printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
+ // printf("Jump moved from %08X to %08X\n", oj, nj);
+ // printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
assert(nj->idjIG == ig);
assert(nj->idIns() == oj->idIns());
memset(&emitConsDsc, 0, sizeof(emitConsDsc));
- // for random NOP insertion
-
#ifdef PSEUDORANDOM_NOP_INSERTION
+ // for random NOP insertion
+
emitEnableRandomNops();
emitComp->info.compRNG.Init(emitComp->info.compChecksum);
emitNextNop = emitNextRandomNop();
}
#endif
+#ifdef PSEUDORANDOM_NOP_INSERTION
// TODO-ARM-Bug?: PSEUDORANDOM_NOP_INSERTION is not defined for _TARGET_ARM_
// ARM - This is currently broken on _TARGET_ARM_
// When nopSize is odd we misalign emitCurIGsize
//
-#ifdef PSEUDORANDOM_NOP_INSERTION
if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT)
&& !emitInInstrumentation
&& !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog
emitInsCount++;
- /* In debug mode we clear/set some additional fields */
-
#if defined(DEBUG) || defined(LATE_DISASM)
+ /* In debug mode we clear/set some additional fields */
instrDescDebugInfo * info = (instrDescDebugInfo *) emitGetMem(sizeof(*info));
if (emitCurIGnonEmpty() || emitCurIG == emitPrologIG)
emitSavIG();
+#if EMIT_TRACK_STACK_DEPTH
/* Reset the stack depth values */
-#if EMIT_TRACK_STACK_DEPTH
emitCurStackLvl = 0;
emitCntStackDepth = sizeof(int);
#endif
#ifdef DEBUGGING_SUPPORT
+#if FEATURE_EH_FUNCLETS
// Add the appropriate IP mapping debugging record for this placeholder
// group.
-#if FEATURE_EH_FUNCLETS
// genExitCode() adds the mapping for main function epilogs
if (emitComp->opts.compDbgInfo)
{
assert(emitCurIGsize <= MAX_PLACEHOLDER_IG_SIZE);
+#if EMIT_TRACK_STACK_DEPTH
/* Reset the stack depth values */
-#if EMIT_TRACK_STACK_DEPTH
emitCurStackLvl = 0;
emitCntStackDepth = sizeof(int);
#endif
reportCandidate = false;
}
+ // Report it!
if (reportCandidate)
{
- // Report it!
-
#ifdef DEBUG
if (EMITVERBOSE && (candidateSize >= maxSplitSize))
printf("emitSplit: split at IG%02u is size %d, larger than requested maximum size of %d\n", igLastCandidate->igNum, candidateSize, maxSplitSize);
{
instrDescCGCA* id = emitAllocInstrCGCA(retSize);
-// printf("Direct call with GC vars / big arg cnt / explicit scope\n");
+ // printf("Direct call with GC vars / big arg cnt / explicit scope\n");
id->idSetIsLargeCall();
{
instrDesc * id = emitNewInstrCns(retSize, argCnt);
-// printf("Direct call w/o GC vars / big arg cnt / explicit scope\n");
+ // printf("Direct call w/o GC vars / big arg cnt / explicit scope\n");
/* Make sure we didn't waste space unexpectedly */
assert(!id->idIsLargeCns());
/* Issue the next instruction */
-// printf("[S=%02u] " , emitCurStackLvl);
+ // printf("[S=%02u] " , emitCurStackLvl);
is = emitOutputInstr(ig, id, dp);
-// printf("[S=%02u]\n", emitCurStackLvl);
+ // printf("[S=%02u]\n", emitCurStackLvl);
#if EMIT_TRACK_STACK_DEPTH
* ARM has a small, medium, and large encoding. The large encoding is a pseudo-op
* to handle greater range than the conditional branch instructions can handle.
* ARM64 has a small and large encoding for both conditional branch and loading label addresses.
- * The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently NYI).
+ * The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
+ * NYI).
*/
void emitter::emitJumpDistBind()
do
{
lstIG = lstIG->igNext; assert(lstIG);
-// printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, lstIG->igOffs - adjIG);
+ // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+ // lstIG->igOffs - adjIG);
lstIG->igOffs -= adjIG;
assert(IsCodeAligned(lstIG->igOffs));
}
jmp->idjOffs -= adjLJ;
// If this is a jump via register, the instruction size does not change, so we are done.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM64_)
// JIT code and data will be allocated together for arm64 so the relative offset to JIT data is known.
else
{
/* First time we've seen this label, convert its target */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (EMITVERBOSE)
// We should not be jumping/branching across funclets/functions
emitCheckFuncletBranch(jmp, jmpIG);
+#ifdef _TARGET_XARCH_
/* Done if this is not a variable-sized jump */
-#ifdef _TARGET_XARCH_
if ( (jmp->idIns() == INS_push) ||
(jmp->idIns() == INS_mov) ||
(jmp->idIns() == INS_call) ||
lstIG = lstIG->igNext;
if (!lstIG)
break;
-// printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, lstIG->igOffs - adjIG);
+ // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+ // lstIG->igOffs - adjIG);
lstIG->igOffs -= adjIG;
assert(IsCodeAligned(lstIG->igOffs));
}
#endif
/* Is there a chance of other jumps becoming short? */
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
#if defined(_TARGET_ARM_)
if (EMITVERBOSE) printf("Total shrinkage = %3u, min extra short jump size = %3u, min extra medium jump size = %u\n", adjIG, minShortExtra, minMediumExtra);
#endif
-// if (emitConsDsc.dsdOffs) printf("Cons=%08X\n", consBlock);
+ // if (emitConsDsc.dsdOffs)
+ // printf("Cons=%08X\n", consBlock);
/* Give the block addresses to the caller and other functions here */
*consAddr = emitConsBlock = consBlock;
/* Nothing has been pushed on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if EMIT_TRACK_STACK_DEPTH
emitCurStackLvl = 0;
if (jmp->idjShort)
{
// Patch Forward Short Jump
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_XARCH_)
*(BYTE *)adr -= (BYTE)adj;
#elif defined(_TARGET_ARM_)
else
{
// Patch Forward non-Short Jump
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_XARCH_)
*(int *)adr -= adj;
#elif defined(_TARGET_ARMARCH_)
// See specification comment at the declaration.
void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
{
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
// We only emit this GC information on targets where finally's are implemented via funclets,
// and the finally is invoked, during non-exceptional execution, via a branch with a predefined
// link register, rather than a "true call" for which we would already generate GC info. Currently,
// this means precisely ARM.
-#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
if (ig->igFlags & IGF_FINALLY_TARGET)
{
// We don't actually have a call instruction in this case, so we don't have
instrDesc * id = (instrDesc *)ig->igData;
UNATIVE_OFFSET of = 0;
- /* Make sure we were passed reasonable arguments */
-
#ifdef DEBUG
+ /* Make sure we were passed reasonable arguments */
assert(ig && ig->igSelf == ig);
assert(ig->igInsCnt >= insNum);
#endif
int offs = val & ~OFFSET_MASK;
-// printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
+ // printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
if (VarSetOps::IsMember(emitComp, vars, num))
{
regPtrDsc * regPtrNext;
assert(!isThis || emitComp->lvaKeepAliveAndReportThis());
-// assert(emitFullyInt || isThis);
+ // assert(emitFullyInt || isThis);
assert(emitFullGCinfo);
assert(((emitThisGCrefRegs|emitThisByrefRegs) & regMask) == 0);
regPtrDsc * regPtrNext;
-// assert(emitFullyInt);
+ // assert(emitFullyInt);
assert(emitFullGCinfo);
assert(((emitThisGCrefRegs|emitThisByrefRegs) & regMask) != 0);
of = emitGetInsOfsFromCodePos(codePos);
-// printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
+ // printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
/* Make sure the offset estimate is accurate */
{
/* Push an entry for this argument on the tracking stack */
-// printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
+ // printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
assert(level.IsOverflow() || u2.emitArgTrackTop == u2.emitArgTrackTab + level.Value());
*u2.emitArgTrackTop++ = (BYTE)gcType;
assert(IsValidGCtype(gcType));
-// printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
+ // printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
// This is an "interesting" argument
}
#ifdef JIT32_GCENCODER
- // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any args to pop at this call site? */
+ // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any args to pop at this call site?
if (argRecCnt.Value() == 0)
{
Or do we have a partially interruptible EBP-less frame, and any
of EDI,ESI,EBX,EBP are live, or is there an outer/pending call?
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !FPO_INTERRUPTIBLE
if (emitFullyInt ||
if (needsGC(gcType))
{
-// printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
+ // printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
*argTrackTop = GCT_NONE;
gcCnt += 1;
#endif // FEATURE_EH_FUNCLETS
// Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if REGMASK_BITS <= 32
union
// On Amd64, this is where the second DWORD begins
// On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has member that
// stores the GC-ness of the second register.
- // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the instructions.
- // This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more space to add members
- // for keeping GC-ness of the second return registers. It will also bloat the base struct unnecessarily
- // since the GC-ness of the second register is only needed for call instructions.
+ // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the
+ // instructions. This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more
+ // space to add members for keeping GC-ness of the second return registers. It will also bloat the base struct
+ // unnecessarily since the GC-ness of the second register is only needed for call instructions.
// The instrDescCGCA struct's member keeping the GC-ness of the first return register is _idcSecondRetRegGCType.
GCtype _idGCref :2; // GCref operand? (value is a "GCtype")
// amd64: 38 bits
// arm: 32 bits
// arm64: 30 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if HAS_TINY_DESC
//
// amd64: 46 bits
// arm: 48 bits
// arm64: 48 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef RELOC_SUPPORT
// amd64: 48 bits
// arm: 50 bits
// arm64: 50 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
#define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
////////////////////////////////////////////////////////////////////////
// Space taken up to here (with RELOC_SUPPORT): 64 bits, all architectures, by design.
////////////////////////////////////////////////////////////////////////
+ CLANG_FORMAT_COMMENT_ANCHOR;
#endif // !HAS_TINY_DESC
// There should no padding or alignment issues on any platform or
// configuration (including DEBUG which has 1 extra pointer).
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if HAS_TINY_DESC
unsigned emitCurStackLvl; // amount of bytes pushed on stack
- /* Functions for stack tracking */
#if EMIT_TRACK_STACK_DEPTH
+ /* Functions for stack tracking */
+
void emitStackPush (BYTE * addr,
GCtype gcType);
assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
-// printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
+ // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
return codePos;
}
case IF_T2_E0: case IF_T2_E1: case IF_T2_E2:
case IF_T2_G0: case IF_T2_G1: case IF_T2_H0: case IF_T2_H1:
case IF_T2_K1: case IF_T2_K4:
- // Some formats with "destination" or "target" registers are actually used for store instructions, for the "source" value
- // written to memory.
+ // Some formats with "destination" or "target" registers are actually used for store instructions, for the
+ // "source" value written to memory.
// Similarly, PUSH has a target register, indicating the start of the set of registers to push. POP
// *does* write to at least one register, so we do not make that a special case.
// Various compare/test instructions do not write (except to the flags). Technically "teq" does not need to be
emitter::insFormat emitter::emitInsFormat(instruction ins)
{
+ // clang-format off
const static insFormat insFormats[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) fmt,
#define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
#include "instrs.h"
};
+ // clang-format on
assert(ins < ArrLen(insFormats));
assert((insFormats[ins] != IF_NONE));
#define ST 4
#define CMP 8
+// clang-format off
/*static*/ const BYTE CodeGenInterface::instInfo[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) ldst | INST_FP*fp,
#define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
#include "instrs.h"
};
+// clang-format on
/*****************************************************************************
*
size_t emitter::emitInsCode(instruction ins, insFormat fmt)
{
+ // clang-format off
const static size_t insCodes1[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) e1,
const static insFormat formatEncode2E[2] = { IF_T1_E, IF_T2_C6 };
const static insFormat formatEncode2F[2] = { IF_T1_E, IF_T2_C5 };
const static insFormat formatEncode2G[2] = { IF_T1_J3, IF_T2_M1 };
+ // clang-format on
size_t code = BAD_CODE;
insFormat insFmt = emitInsFormat(ins);
assert(insOptsNone(opt));
// On ARM, the immediate shift count of LSL and ROR must be between 1 and 31. For LSR and ASR, it is between
- // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in instruction
- // encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to the IL SHR, SHL, or SHL.UN
- // instruction that is greater than or equal to the width of the type will yield an undefined value. We choose that
- // undefined value in this case to match x86 behavior, by only using the lower 5 bits of the constant shift count.
+ // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in
+ // instruction encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to
+ // the IL SHR, SHL, or SHL.UN instruction that is greater than or equal to the width of the type will yield
+ // an undefined value. We choose that undefined value in this case to match x86 behavior, by only using the
+ // lower 5 bits of the constant shift count.
imm &= 0x1f;
if (imm == 0)
}
}
- // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC ref or
- // overwritten one.
+ // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+ // ref or overwritten one.
if (emitInsWritesToLclVarStackLoc(id))
{
int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
{
if (EMITVERBOSE)
{
- unsigned idNum = id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio conditional breakpoints
+ unsigned idNum = id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio
+ // conditional breakpoints
printf("IN%04x: ", idNum);
}
/*****************************************************************************/
#ifdef DEBUG
+// clang-format off
static const char * const xRegNames[] =
{
#define REGDEF(name, rnum, mask, xname, wname) xname,
"b25", "b26", "b27", "b28", "b29",
"b30", "b31"
};
+// clang-format on
/*****************************************************************************
*
emitter::insFormat emitter::emitInsFormat(instruction ins)
{
+ // clang-format off
const static insFormat insFormats[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) fmt,
#define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
#include "instrs.h"
};
+ // clang-format on
assert(ins < ArrLen(insFormats));
assert((insFormats[ins] != IF_NONE));
#define ST 4
#define CMP 8
+// clang-format off
/*static*/ const BYTE CodeGenInterface::instInfo[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) ldst | INST_FP*fp,
#define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
#include "instrs.h"
};
-
+// clang-format on
/*****************************************************************************
*
emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
{
+ // clang-format off
const static code_t insCodes1[] =
{
#define INST1(id, nm, fp, ldst, fmt, e1 ) e1,
#define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
#include "instrs.h"
};
+ // clang-format on
+
const static insFormat formatEncode9[9] = { IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C,
IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F };
const static insFormat formatEncode6A[6] = { IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A,
// Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
UINT64 maxVal = 1ULL << width;
UINT64 lowBitsMask = maxVal - 1;
- UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+ UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
+ // (sign bit) must be set.
assert((value < maxVal) ||
((value & signBitsMask) == signBitsMask));
INT32 maxVal = 1 << immWidth;
INT32 lowBitsMask = maxVal - 1;
INT32 hiBitsMask = ~lowBitsMask;
- INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+ INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit
+ // (sign bit) must be set.
assert((imm < maxVal) ||
((imm & signBitsMask) == signBitsMask));
/*****************************************************************************
*
- * Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the architecture manual).
+ * Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
+ * architecture manual).
*/
/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond)
if (dstOffs <= srcOffs)
{
+#if DEBUG_EMIT
/* This is a backward jump - distance is known at this point */
-#if DEBUG_EMIT
if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
{
size_t blkOffs = id->idjIG->igOffs;
}
}
- // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC ref or
- // overwritten one.
+ // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+ // ref or overwritten one.
if (emitInsWritesToLclVarStackLoc(id))
{
int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
// See the LICENSE file in the project root for more information.
//////////////////////////////////////////////////////////////////////////////
-
+// clang-format off
#if !defined(_TARGET_ARM_)
#error Unexpected target type
#endif
#endif // !DEFINE_ID_OPS
//////////////////////////////////////////////////////////////////////////////
+// clang-format on
// See the LICENSE file in the project root for more information.
//////////////////////////////////////////////////////////////////////////////
-
+//clang-format off
#if !defined(_TARGET_ARM64_)
#error Unexpected target type
#endif
#endif // !DEFINE_ID_OPS
//////////////////////////////////////////////////////////////////////////////
+// clang-format on
// This file was previously known as emitfmts.h
//
+// clang-format off
#if !defined(_TARGET_XARCH_)
#error Unexpected target type
#endif
#endif // DEFINE_IS_OPS
#endif // DEFINE_ID_OPS
//////////////////////////////////////////////////////////////////////////////
+// clang-format on
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-
+// clang-format off
#ifndef JMP_SMALL
#error Must define JMP_SMALL macro before including this file
#endif
/*****************************************************************************/
#undef JMP_SMALL
/*****************************************************************************/
+
+// clang-format on
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-
/************************************************************************/
/* Overall emitter control (including startup and shutdown) */
/************************************************************************/
unsigned emitGetInstructionSize(emitLocation* emitLoc);
#endif // defined(_TARGET_ARM_)
+
#define INST_DEF_FL 0x20 // does the instruction set flags?
#define INST_USE_FL 0x40 // does the instruction use flags?
+// clang-format off
const BYTE CodeGenInterface::instInfo[] =
{
#define INST0(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
#undef INST4
#undef INST5
};
+// clang-format on
/*****************************************************************************
*
* Initialize the table used by emitInsModeFormat().
*/
+// clang-format off
const BYTE emitter::emitInsModeFmtTab[] =
{
#define INST0(id, nm, fp, um, rf, wf, mr ) um,
#undef INST4
#undef INST5
};
+// clang-format on
#ifdef DEBUG
unsigned const emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab)/
inline
size_t insCode(instruction ins)
{
+ // clang-format off
const static
size_t insCodes[] =
{
#undef INST4
#undef INST5
};
+ // clang-format on
assert((unsigned)ins < sizeof(insCodes)/sizeof(insCodes[0]));
assert((insCodes[ins] != BAD_CODE));
inline
size_t insCodeMI(instruction ins)
{
+ // clang-format off
const static
size_t insCodesMI[] =
{
#undef INST4
#undef INST5
};
+ // clang-format on
assert((unsigned)ins < sizeof(insCodesMI)/sizeof(insCodesMI[0]));
assert((insCodesMI[ins] != BAD_CODE));
inline
size_t insCodeRM(instruction ins)
{
+ // clang-format off
const static
size_t insCodesRM[] =
{
#undef INST4
#undef INST5
};
+ // clang-format on
assert((unsigned)ins < sizeof(insCodesRM)/sizeof(insCodesRM[0]));
assert((insCodesRM[ins] != BAD_CODE));
inline
size_t insCodeACC(instruction ins)
{
+ // clang-format off
const static
size_t insCodesACC[] =
{
#undef INST4
#undef INST5
};
+ // clang-format on
assert((unsigned)ins < sizeof(insCodesACC)/sizeof(insCodesACC[0]));
assert((insCodesACC[ins] != BAD_CODE));
inline
size_t insCodeRR(instruction ins)
{
+ // clang-format off
const static
size_t insCodesRR[] =
{
#undef INST4
#undef INST5
};
+ // clang-format on
assert((unsigned)ins < sizeof(insCodesRR)/sizeof(insCodesRR[0]));
assert((insCodesRR[ins] != BAD_CODE));
return insCodesRR[ins];
}
+// clang-format off
const static
size_t insCodesMR[] =
{
#undef INST4
#undef INST5
};
+// clang-format on
// Returns true iff the give CPU instruction has an MR encoding.
inline
#endif
{
// Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef UNIX_AMD64_ABI
LclVarDsc* varDsc = emitComp->lvaTable + var;
bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
{
size = 2;
- // Most 16-bit operands will require a size prefix .
+ // Most 16-bit operands will require a size prefix.
// This refers to 66h size prefix override.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_STACK_FP_X87
if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
//
// Post-conditions:
// For base address of int constant:
-// -- the caller must have added the int constant base to the instrDesc when creating it via emitNewInstrAmdCns().
+// -- the caller must have added the int constant base to the instrDesc when creating it via
+// emitNewInstrAmdCns().
// For simple address modes (base + scale * index + offset):
// -- the base register, index register, and scale factor are set.
-// -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via emitNewInstrAmdCns().
+// -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
+// emitNewInstrAmdCns().
//
// The instruction format is set.
//
// Vex prefix size
sz += emitGetVexPrefixSize(ins, attr);
- // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a 64-bit sized
- // operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target register. So we also
- // need to check if that built-in register is an extended register.
+ // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
+ // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
+ // register. So we also need to check if that built-in register is an extended register.
if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
{
sz += emitGetRexPrefixSize(ins);
emitAttr size = EA_SIZE(attr);
/* We don't want to generate any useless mov instructions! */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_AMD64_
// Same-reg 4-byte mov can be useful because it performs a
// zero-extension to 8 bytes.
#endif
#if RELOC_SUPPORT
+#ifndef _TARGET_AMD64_
// Storing the address of a basicBlock will need a reloc
// as the instruction uses the absolute address,
// not a relative address.
//
// On Amd64, Absolute code addresses should always go through a reloc to
// to be encoded as RIP rel32 offset.
-#ifndef _TARGET_AMD64_
if (emitComp->opts.compReloc)
#endif
{
case IF_TWR_MRD:
case IF_TRW_MRD:
-// case IF_MRD_TRD:
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
case IF_MWR_TRD:
-// case IF_MRW_TRD:
#endif // FEATURE_STACK_FP_X87
case IF_MRD_OFF:
}
}
-// printf("[F=%s] " , emitIfName(id->idInsFmt()));
-// printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
-// printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
-// printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
-// printf("[A=%08X] " , emitSimpleStkMask);
-// printf("[A=%08X] " , emitSimpleByrefStkMask);
-// printf("[L=%02u] " , id->idCodeSize());
+ // printf("[F=%s] " , emitIfName(id->idInsFmt()));
+ // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
+ // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
+ // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
+ // printf("[A=%08X] " , emitSimpleStkMask);
+ // printf("[A=%08X] " , emitSimpleByrefStkMask);
+ // printf("[L=%02u] " , id->idCodeSize());
if (!emitComp->opts.dspEmit && !isNew && !asmfm)
doffs = true;
case IF_TWR_ARD:
case IF_TRW_ARD:
-// case IF_ARD_TRD:
+ // case IF_ARD_TRD:
case IF_AWR_TRD:
-// case IF_ARW_TRD:
+ // case IF_ARW_TRD:
#endif // FEATURE_STACK_FP_X87
if (ins == INS_call && id->idIsCallRegPtr())
case IF_TWR_SRD:
case IF_TRW_SRD:
-// case IF_SRD_TRD:
+ // case IF_SRD_TRD:
+ // case IF_SRW_TRD:
case IF_SWR_TRD:
-// case IF_SRW_TRD:
#endif // FEATURE_STACK_FP_X87
case IF_TWR_MRD:
case IF_TRW_MRD:
-// case IF_MRD_TRD:
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
case IF_MWR_TRD:
-// case IF_MRW_TRD:
#endif // FEATURE_STACK_FP_X87
// If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
// register pointer mask.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
GCtype gc1, gc2;
/*****************************************************************************
*
* Output a local jump instruction.
- * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that needs
- * to get bound to an actual address and processed by branch shortening.
+ * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
+ * needs to get bound to an actual address and processed by branch shortening.
*/
BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
if (dstOffs <= srcOffs)
{
// This is a backward jump - distance is known at this point
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if DEBUG_EMIT
if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
{
case IF_TWR_ARD:
case IF_TRW_ARD:
-// case IF_ARD_TRD:
+ // case IF_ARD_TRD:
+ // case IF_ARW_TRD:
case IF_AWR_TRD:
-// case IF_ARW_TRD:
#endif // FEATURE_STACK_FP_X87
case IF_TWR_SRD:
case IF_TRW_SRD:
-// case IF_SRD_TRD:
+ // case IF_SRD_TRD:
+ // case IF_SRW_TRD:
case IF_SWR_TRD:
-// case IF_SRW_TRD:
#endif // FEATURE_STACK_FP_X87
if (ins == INS_pop)
{
// The offset in "pop [ESP+xxx]" is relative to the new ESP value
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !FEATURE_FIXED_OUT_ARGS
emitCurStackLvl -= sizeof(int);
case IF_TWR_MRD:
case IF_TRW_MRD:
-// case IF_MRD_TRD:
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
case IF_MWR_TRD:
-// case IF_MRW_TRD:
#endif // FEATURE_STACK_FP_X87
insFormat FPld,
insFormat FPst);
- bool emitVerifyEncodable(instruction ins,
+ bool emitVerifyEncodable(instruction ins,
emitAttr size,
regNumber reg1,
regNumber reg2 = REG_NA);
}
else if (!fromStillPresent && !toAlreadyPresent)
{
- // write "to" where "from" was
#ifdef DEBUG
+ // write "to" where "from" was
bool foundFrom = false;
#endif // DEBUG
for (unsigned i = 0; i < numDistinctSuccs; i++)
else
{
assert(!fromStillPresent && toAlreadyPresent);
- // remove "from".
#ifdef DEBUG
+ // remove "from".
bool foundFrom = false;
#endif // DEBUG
for (unsigned i = 0; i < numDistinctSuccs; i++)
// the test. This depends on the value of opts.compGCPollType.
// If we're doing GCPOLL_CALL, just insert a GT_CALL node before the last node in the block.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
switch (block->bbJumpKind)
addrTrap = info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+#ifdef ENABLE_FAST_GCPOLL_HELPER
// I never want to split blocks if we've got two indirections here.
// This is a size trade-off assuming the VM has ENABLE_FAST_GCPOLL_HELPER.
// So don't do it when that is off
-#ifdef ENABLE_FAST_GCPOLL_HELPER
if (pAddrOfCaptureThreadGlobal != NULL)
{
pollType = GCPOLL_CALL;
}
fgRemoveStmt(top, stmt);
fgInsertStmtAtEnd(bottom, stmt);
+
}
+
// for BBJ_ALWAYS blocks, bottom is an empty block.
// 4) Create a GT_EQ node that checks against g_TrapReturningThreads. True jumps to Bottom,
// false falls through to poll. Add this to the end of Top. Top is now BBJ_COND. Bottom is
// now a jump target
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef ENABLE_FAST_GCPOLL_HELPER
// Prefer the fast gc poll helepr over the double indirection
__fallthrough;
- // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call target
- // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll have to
- // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline candidate
- // so we can generate an inlined call frame. It might be nice to call getCallInfo to figure out what kind of
- // call we have here.
case CEE_CALLI:
case CEE_LOCALLOC:
case CEE_MKREFANY:
case CEE_RETHROW:
+ // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call target
+ // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll have to
+ // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline
+ // candidate so we can generate an inlined call frame. It might be nice to call getCallInfo to figure out
+ // what kind of call we have here.
+
//Consider making this only for not force inline.
if (makeInlineObservations)
{
#ifndef LEGACY_BACKEND
if (opts.compProcedureSplitting)
{
- // TODO-CQ: We might need to create a switch table; we won't know for sure until much later. However, switch tables
- // don't work with hot/cold splitting, currently. The switch table data needs a relocation such that if the base
- // (the first block after the prolog) and target of the switch branch are put in different sections, the difference
- // stored in the table is updated. However, our relocation implementation doesn't support three different pointers
- // (relocation address, base, and target). So, we need to change our switch table implementation to be more like
- // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself (maybe
- // immediately after the switch jump), and make the "base" address be also in that section, probably the
- // address after the switch jump.
+ // TODO-CQ: We might need to create a switch table; we won't know for sure until much later.
+ // However, switch tables don't work with hot/cold splitting, currently. The switch table data needs
+ // a relocation such that if the base (the first block after the prolog) and target of the switch
+ // branch are put in different sections, the difference stored in the table is updated. However, our
+ // relocation implementation doesn't support three different pointers (relocation address, base, and
+ // target). So, we need to change our switch table implementation to be more like
+ // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself
+ // (maybe immediately after the switch jump), and make the "base" address be also in that section,
+ // probably the address after the switch jump.
opts.compProcedureSplitting = false;
JITDUMP("Turning off procedure splitting for this method, as it might need switch tables; implementation limitation.\n");
}
bool isCallPopAndRet = false;
// impIsTailCallILPattern uses isRecursive flag to determine whether ret in a fallthrough block is
- // allowed. We don't know at this point whether the call is recursive so we conservatively pass false.
- // This will only affect explicit tail calls when IL verification is not needed for the method.
+ // allowed. We don't know at this point whether the call is recursive so we conservatively pass
+ // false. This will only affect explicit tail calls when IL verification is not needed for the
+ // method.
bool isRecursive = false;
if (!impIsTailCallILPattern(tailCall, opcode, codeAddr+sz, codeEndp, isRecursive, &isCallPopAndRet))
{
verCheckNestingLevel(initRoot);
}
+#ifndef DEBUG
// fgNormalizeEH assumes that this test has been passed. And Ssa assumes that fgNormalizeEHTable
// has been run. So do this unless we're in minOpts mode (and always in debug).
-#ifndef DEBUG
if (tiVerificationNeeded || !opts.MinOpts())
#endif
{
// So don't fix their prev next links.
if (stmt->gtStmtIsEmbedded() && stack.Height() == 2)
{
- //
+ // clang-format off
// Two cases:
// Case 1 (Initial case -- we are discovering the first embedded stmt):
// Before:
// Currently, "node" is emb3List and "lastNestEmbedNode" is emb2Expr.
// After:
// ... -> emb2List -> emb2Expr -> -> emb3List -> emb3Expr -> stmtNode -> ... -> stmtExpr
- //
+ // clang-format on
// Drop stmtNodes that occur between emb2Expr and emb3List.
if (lastNestEmbedNode)
}
#endif // DEBUG
+#ifdef DEBUG
/* Some extra checks for the empty case */
-#ifdef DEBUG
switch (block->bbJumpKind)
{
case BBJ_NONE:
void Compiler::fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk)
{
/* We have decided to insert the block(s) after 'insertAfterBlk' */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
INDEBUG(const char* reason = "None";)
+ // Figure out the range of blocks we're going to move
+
unsigned XTnum;
EHblkDsc* HBtab;
BasicBlock* bStart = NULL;
BasicBlock* bLast = NULL;
BasicBlock* bPrev = NULL;
- // Figure out the range of blocks we're going to move
-
#if FEATURE_EH_FUNCLETS
// We don't support moving try regions... yet?
noway_assert(relocateType == FG_RELOCATE_HANDLER);
// 1. Verify that all the blocks in the range are either all rarely run or not rarely run.
// When creating funclets, we ignore the run rarely flag, as we need to be able to move any blocks
// in the range.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !FEATURE_EH_FUNCLETS
bool isRare;
// Currently it is not good to move the rarely run handler regions to the end of the method
// because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if 0
// Now try to move the entire handler region if it can be moved.
// Don't try to move a finally handler unless we already moved the try region.
// replace it with a COMMA node. In such a case we will end up with GT_JTRUE node pointing to
// a COMMA node which results in noway asserts in fgMorphSmpOp(), optAssertionGen() and rpPredictTreeRegUse().
// For the same reason fgMorphSmpOp() marks GT_JTRUE nodes with RELOP children as GTF_DONT_CSE.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
{
zeroConstNode);
switchTree->gtOp.gtOp1 = condNode;
switchTree->gtOp.gtOp1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
// Re-link the nodes for this statement.
// We know that this is safe for the Lowered form, because we will have eliminated any embedded trees
// when we cloned the switch condition (it is also asserted above).
+
fgSetStmtSeq(switchStmt);
block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
block->bbJumpKind = BBJ_COND;
for (blk = startBlk; blk != endBlk; blk = blk->bbNext)
{
// The only way (blk == nullptr) could be true is if the caller passed an endBlk that preceded startBlk in the
- // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar well-formedness
- // assert earlier in this function.
+ // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar
+ // well-formedness assert earlier in this function.
noway_assert(blk != nullptr);
if (blk == nearBlk)
// If 'putInFilter' it true, then the block is inserted in the filter region given by 'hndIndex'. In this case, tryIndex
// must be a less nested EH region (that is, tryIndex > hndIndex).
//
-// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner region.
-// In other words, if the try region indicated by tryIndex is nested in the handler region indicated by hndIndex,
+// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner
+// region. In other words, if the try region indicated by tryIndex is nested in the handler region indicated by hndIndex,
// then the new BB will be created in the try region. Vice versa.
//
-// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is, "0" is
-// "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is,
+// "0" is "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
//
// To be more specific, this function will create a new BB in one of the following 5 regions (if putInFilter is false):
// 1. When tryIndex = 0 and hndIndex = 0:
//
// Arguments:
// jumpKind - the jump kind of the new block to create.
-// tryIndex - the try region to insert the new block in, described above. This must be a number in the range [0..compHndBBtabCount].
-// hndIndex - the handler region to insert the new block in, described above. This must be a number in the range [0..compHndBBtabCount].
-// nearBlk - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere in the requested region.
+// tryIndex - the try region to insert the new block in, described above. This must be a number in the range
+// [0..compHndBBtabCount].
+// hndIndex - the handler region to insert the new block in, described above. This must be a number in the range
+// [0..compHndBBtabCount].
+// nearBlk - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere
+// in the requested region.
// putInFilter - put the new block in the filter region given by hndIndex, as described above.
// runRarely - 'true' if the new block is run rarely.
-// insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only implemented when
-// inserting into the main function (not into any EH region).
+// insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
+// implemented when inserting into the main function (not into any EH region).
//
// Return Value:
// The new block.
break;
#endif // COR_JIT_EE_VERSION
-// case SCK_PAUSE_EXEC:
-// noway_assert(!"add code to pause exec");
+ // case SCK_PAUSE_EXEC:
+ // noway_assert(!"add code to pause exec");
default:
noway_assert(!"unexpected code addition kind");
}
#endif // DEBUG
- /* Walk the basic blocks to assign sequence numbers */
-
#ifdef DEBUG
BasicBlock::s_nMaxTrees = 0;
#endif
+ /* Walk the basic blocks to assign sequence numbers */
+
/* If we don't compute the doms, then we never mark blocks as loops. */
if (fgDomsComputed)
{
noway_assert(list.gtNext->gtPrev == &list);
list.gtNext->gtPrev = NULL;
+#ifdef DEBUG
/* Keep track of the highest # of tree nodes */
-#ifdef DEBUG
if (BasicBlock::s_nMaxTrees < fgTreeSeqNum)
{
BasicBlock::s_nMaxTrees = fgTreeSeqNum;
const char* sub;
};
+// clang-format off
static escapeMapping_t s_EscapeFileMapping[] =
{
{':', "="},
{'"', """},
{0, 0}
};
+// clang-formt on
const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
{
// - The command "C:\Program Files (x86)\Graphviz2.38\bin\dot.exe" -Tsvg -oFoo.svg -Kdot Foo.dot
// will produce a Foo.svg file that can be opened with any svg-capable browser (e.g. IE).
// - http://rise4fun.com/Agl/
-// - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play button.
+// - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play
+// button.
// - It will show a rotating '/' and then render the graph in the browser.
// MSAGL has also been open-sourced to https://github.com/Microsoft/automatic-graph-layout.git.
//
// Here are the config values that control it:
-// COMPlus_JitDumpFg A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs for.
+// COMPlus_JitDumpFg A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs
+// for.
// COMPlus_JitDumpFgDir A path to a directory into which the flowgraphs will be dumped.
// COMPlus_JitDumpFgFile The filename to use. The default is "default.[xml|dot]".
// Note that the new graphs will be appended to this file if it already exists.
// COMPlus_JitDumpFgPhase Phase(s) after which to dump the flowgraph.
// Set to the short name of a phase to see the flowgraph after that phase.
-// Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all phases.
+// Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all
+// phases.
// COMPlus_JitDumpFgDot Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.)
bool Compiler::fgDumpFlowGraph(Phases phase)
padWidth = 8;
#endif // _TARGET_AMD64_
- // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as wide as necessary to accommodate
- // all the various IBC weights. It's at least 4 characters wide, to accommodate the "IBC" title and leading space.
+ // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as
+ // wide as necessary to accommodate all the various IBC weights. It's at least 4 characters wide, to accommodate
+ // the "IBC" title and leading space.
int ibcColWidth = 0;
for (block = firstBlock; block != nullptr; block = block->bbNext)
{
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-
#ifndef _JIT_FP
#define _JIT_FP
enum dummyFPenum
{
- #define REGDEF(name, rnum, mask, sname) dummmy_##name = rnum,
- #include "registerfp.h"
+#define REGDEF(name, rnum, mask, sname) dummmy_##name = rnum,
+#include "registerfp.h"
FP_VIRTUALREGISTERS,
};
-
// FlatFPStateX87 holds the state of the virtual register file. For each
// virtual register we keep track to which physical register we're
// mapping. We also keep track of the physical stack.
unsigned Pop ();
void Push (unsigned uEntry);
bool IsEmpty ();
-
+
// Debug/test methods
static bool AreEqual (FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
- #ifdef DEBUG
+#ifdef DEBUG
bool IsValidEntry (unsigned uEntry);
bool IsConsistent ();
void UpdateMappingFromStack ();
{
m_bIgnoreConsistencyChecks = bIgnore;
}
- #else
+#else
inline void IgnoreConsistencyChecks(bool bIgnore)
- {
- }
- #endif
+ {
+ }
+#endif
unsigned m_uVirtualMap[FP_VIRTUALREGISTERS];
unsigned m_uStack[FP_PHYSICREGISTERS];
unsigned m_uStackSize;
-};
-
+};
+
#endif // FEATURE_STACK_FP_X87
#endif
#endif
/* Write the method size first (using between 1 and 5 bytes) */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
// pointers" section of the GC info even if lvTracked==true
// Has this argument been fully enregistered?
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
if (!varDsc->lvOnFrame)
#else // LEGACY_BACKEND
}
// If we haven't continued to the next variable, we should report this as an untracked local.
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if DOUBLE_ALIGN
// For genDoubleAlign(), locals are addressed relative to ESP and
// arguments are addressed relative to EBP.
// (2) a regular one for after the filter
// and then adjust the original lifetime to end before
// the filter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
// somewhere inside it, so we only create 1 new lifetime,
// and then adjust the original lifetime to end before
// the filter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
// lifetime for the part inside the filter and adjust
// the start of the original lifetime to be the end
// of the filter
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
{
{
// The variable lifetime is completely within the filter,
// so just add the pinned flag.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (compiler->verbose)
{
gcDumpVarPtrDsc(varTmp);
}
#endif // DEBUG
+
varTmp->vpdVarNum |= pinned_OFFSET_FLAG;
#ifdef DEBUG
if (compiler->verbose)
*/
/* Has this argument been fully enregistered? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
if (!varDsc->lvOnFrame)
#else // LEGACY_BACKEND
// by the caller of the Push() method.
enum IndentChars {ICVertical, ICBottom, ICTop, ICMiddle, ICDash, ICEmbedded, ICTerminal, ICError, IndentCharCount };
+
+// clang-format off
// Sets of strings for different dumping options vert bot top mid dash embedded terminal error
static const char* emptyIndents[IndentCharCount] = { " ", " ", " ", " ", " ", "{", "", "?" };
static const char* asciiIndents[IndentCharCount] = { "|", "\\", "/", "+", "-", "{", "*", "?" };
static const char* unicodeIndents[IndentCharCount] = { "\xe2\x94\x82", "\xe2\x94\x94", "\xe2\x94\x8c", "\xe2\x94\x9c", "\xe2\x94\x80", "{", "\xe2\x96\x8c", "?" };
+// clang-format on
+
typedef ArrayStack<Compiler::IndentInfo> IndentInfoStack;
struct IndentStack
{
}
// Now set all of the appropriate entries to 'large'
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// On ARM32, ARM64 and System V for struct returning
// there is code that does GT_ASG-tree.CopyObj call.
// CopyObj is a large node and the GT_ASG is small, which triggers an exception.
-#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE;
#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
case GT_ADDR:
+#if FEATURE_STACK_FP_X87
/* If the operand was floating point, pop the value from the stack */
-#if FEATURE_STACK_FP_X87
if (varTypeIsFloating(op1->TypeGet()))
{
codeGen->genDecrementFPstkLevel();
// [base + idx * mul + cns] // mul can be 0, 2, 4, or 8
// Note that mul == 0 is semantically equivalent to mul == 1.
// Note that cns can be zero.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if SCALED_ADDR_MODES
assert((base != NULL) || (idx != NULL && mul >= 2));
#else
costSz += (op1->gtCostSz + op2->gtCostSz);
DONE_OP1_AFTER_COST:
+#if FEATURE_STACK_FP_X87
/*
Binary FP operators pop 2 operands and produce 1 result;
FP comparisons pop 2 operands and produces 0 results.
assignments consume 1 value and don't produce anything.
*/
-#if FEATURE_STACK_FP_X87
if (isflt && !tree->IsPhiDefn())
{
assert(oper != GT_COMMA);
#endif
#endif
+#if GTF_CALL_REG_SAVE
// Normally function calls don't preserve caller save registers
// and thus are much more expensive.
// However a few function calls do preserve these registers
// such as the GC WriteBarrier helper calls.
-#if GTF_CALL_REG_SAVE
if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
#endif
{
DONE:
#if FEATURE_STACK_FP_X87
-// printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
+ // printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
#define GTSTRUCT_4(nm, tag, tag2, tag3, tag4) /*handle explicitly*/
#define GTSTRUCT_N(nm, ...) /*handle explicitly*/
#include "gtstructs.h"
- // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
+
#if !FEATURE_EH_FUNCLETS
+ // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
case GT_END_LFIN:
case GT_JMP:
{ GenTreeVal gt(GT_JMP, TYP_INT, 0); res = *reinterpret_cast<VtablePtr*>(>); break; }
#if SMALL_TREE_NODES
/* This local variable node may later get transformed into a large node */
-// assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+ // assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
GenTreePtr node = new(this, GT_CALL) GenTreeLclVar(type, lnum, ILoffs
DEBUGARG(/*largeNode*/true));
if (kind & GTK_SMPOP)
{
/* If necessary, make sure we allocate a "fat" tree node */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if SMALL_TREE_NODES
switch (oper)
{
bool result = false;
+#if FEATURE_SET_FLAGS
// This method is a Nop unless FEATURE_SET_FLAGS is defined
-#if FEATURE_SET_FLAGS
// In order to set GTF_SET_FLAGS
// we must have a GTK_SMPOP
// and we have a integer or machine size type (not floating point or TYP_LONG on 32-bit)
// get child msg
if (tree->IsCall())
{
- // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers)
- // print the nodes of the nested list and continue to the next argument.
+ // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in
+ // multiple registers) print the nodes of the nested list and continue to the next argument.
if (listElem->gtOper == GT_LIST)
{
int listCount = 0;
// Cross-compilation is an issue here; if that becomes an important scenario, we should
// capture the target-specific values of overflow casts to the various integral types as
// constants in a target-specific function.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_XARCH_
// Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper
// expect long constants to show up as an operand of overflow cast operation.
//
// TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
if (!fgGlobalMorph)
{
goto OVF;
INT_OVF:
+#ifndef LEGACY_BACKEND
// Don't fold overflow operations if not global morph phase.
// The reason for this is that this optimization is replacing a gentree node
// with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
// expect long constants to show up as an operand of overflow cast operation.
//
// TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
-#ifndef LEGACY_BACKEND
+
if (!fgGlobalMorph)
{
assert(tree->gtOverflow());
// TODO-Amd64-Unix: Remove the code that disables optimizations for this method when the clang
// optimizer is fixed and/or the method implementation is refactored in a simpler code.
- // There is a bug in the clang-3.5 optimizer. The issue is that in release build the optimizer is mistyping
- // (or just wrongly decides to use 32 bit operation for a corner case of MIN_LONG) the args of the (ltemp / lval2)
- // to int (it does a 32 bit div operation instead of 64 bit.)
- // For the case of lval1 and lval2 equal to MIN_LONG (0x8000000000000000) this results in raising a SIGFPE.
+ // There is a bug in the clang-3.5 optimizer. The issue is that in release build the
+ // optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner
+ // case of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div
+ // operation instead of 64 bit.). For the case of lval1 and lval2 equal to MIN_LONG
+ // (0x8000000000000000) this results in raising a SIGFPE.
// Optimizations disabled for now. See compiler.h.
if ((ltemp/lval2) != lval1) goto LNG_OVF;
}
//
// Example:
// float a = float.MaxValue;
- // float b = a*a; This will produce +inf in single precision and 1.1579207543382391e+077 in double precision.
+ // float b = a*a; This will produce +inf in single precision and 1.1579207543382391e+077 in double
+ // precision.
// flaot c = b/b; This will produce NaN in single precision and 1 in double precision.
case GT_ADD:
if (op1->TypeGet() == TYP_FLOAT)
//------------------------------------------------------------------------
void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* parentStack)
{
- // We only need to Quirk for _TARGET_64BIT_
#ifdef _TARGET_64BIT_
+ // We only need to Quirk for _TARGET_64BIT_
// Do we have a parent node that is a Call?
if (!Compiler::gtHasCallOnStack(parentStack))
// that we don't miss 'use' of any local. The below logic is making the assumption
// that in case of LEA(base, index, offset) - only base can be a GT_LCL_VAR_ADDR
// and index is not.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
GenTreePtr index = gtOp.gtOp2;
if (index != nullptr)
#include "nodeinfo.h"
#include "simd.h"
-// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type it's got.
-// This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the build to define DEBUGGABLE_GENTREE=1,
-// as well as pass /OPT:NOICF to the linker (or else all the vtables get merged, making the debugging value supplied by them useless).
-// See protojit.nativeproj for a commented example of setting the build flags correctly.
+// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type
+// it's got. This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the
+// build to define DEBUGGABLE_GENTREE=1, as well as pass /OPT:NOICF to the linker (or else all the vtables get merged,
+// making the debugging value supplied by them useless). See protojit.nativeproj for a commented example of setting the
+// build flags correctly.
#ifndef DEBUGGABLE_GENTREE
#ifdef DEBUG
#define DEBUGGABLE_GENTREE 1
GT_COUNT,
+#ifdef _TARGET_64BIT_
// GT_CNS_NATIVELONG is the gtOper symbol for GT_CNS_LNG or GT_CNS_INT, depending on the target.
// For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
- // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
- // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
- //
-#ifdef _TARGET_64BIT_
GT_CNS_NATIVELONG = GT_CNS_INT,
#else
+ // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+ // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
GT_CNS_NATIVELONG = GT_CNS_LNG,
#endif
}
//
// Register or register pair number of the node.
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
public:
#define GTF_CALL_UNMANAGED 0x80000000 // GT_CALL -- direct call to unmanaged code
#define GTF_CALL_INLINE_CANDIDATE 0x40000000 // GT_CALL -- this call has been marked as an inline candidate
-//
+
#define GTF_CALL_VIRT_KIND_MASK 0x30000000
#define GTF_CALL_NONVIRT 0x00000000 // GT_CALL -- a non virtual call
#define GTF_CALL_VIRT_STUB 0x10000000 // GT_CALL -- a stub-dispatch virtual call
#define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT
#define GTF_INX_STRING_LAYOUT 0x40000000 // GT_INDEX -- this uses the special string array layout
- #define GTF_IND_VOLATILE 0x40000000 // GT_IND -- the load or store must use volatile sematics (this is a nop on X86)
+ #define GTF_IND_VOLATILE 0x40000000 // GT_IND -- the load or store must use volatile sematics (this is a nop
+ // on X86)
#define GTF_IND_REFARR_LAYOUT 0x20000000 // GT_IND -- the array holds object refs (only effects layout of Arrays)
#define GTF_IND_TGTANYWHERE 0x10000000 // GT_IND -- the target could be anywhere
#define GTF_IND_TLS_REF 0x08000000 // GT_IND -- the target is accessed via TLS
- #define GTF_IND_ASG_LHS 0x04000000 // GT_IND -- this GT_IND node is (the effective val) of the LHS of an assignment; don't evaluate it independently.
- #define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case alignment of 1 byte)
+ #define GTF_IND_ASG_LHS 0x04000000 // GT_IND -- this GT_IND node is (the effective val) of the LHS of an
+ // assignment; don't evaluate it independently.
+ #define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case
+ // alignment of 1 byte)
#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection)
- #define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF contribution to its argument).
+ #define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF
+ // contribution to its argument).
#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index
#define GTF_IND_FLAGS (GTF_IND_VOLATILE|GTF_IND_REFARR_LAYOUT|GTF_IND_TGTANYWHERE|GTF_IND_NONFAULTING|\
GTF_IND_TLS_REF|GTF_IND_UNALIGNED|GTF_IND_INVARIANT|GTF_IND_ARR_INDEX)
- #define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS of an assignment; don't evaluate it independently.
+ #define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS
+ // of an assignment; don't evaluate it independently.
#define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guaranteed to be on the stack
- #define GTF_ADDRMODE_NO_CSE 0x80000000 // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex addressing mode
+ #define GTF_ADDRMODE_NO_CSE 0x80000000 // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex
+ // addressing mode
#define GTF_MUL_64RSLT 0x40000000 // GT_MUL -- produce 64-bit result
#define GTF_RELOP_NAN_UN 0x80000000 // GT_<relop> -- Is branch taken if ops are NaN?
#define GTF_RELOP_JMP_USED 0x40000000 // GT_<relop> -- result of compare used for jump or ?:
#define GTF_RELOP_QMARK 0x20000000 // GT_<relop> -- the node is the condition for ?:
- #define GTF_RELOP_SMALL 0x10000000 // GT_<relop> -- We should use a byte or short sized compare (op1->gtType is the small type)
- #define GTF_RELOP_ZTT 0x08000000 // GT_<relop> -- Loop test cloned for converting while-loops into do-while with explicit "loop test" in the header block.
+ #define GTF_RELOP_SMALL 0x10000000 // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
+ // is the small type)
+ #define GTF_RELOP_ZTT 0x08000000 // GT_<relop> -- Loop test cloned for converting while-loops into do-while
+ // with explicit "loop test" in the header block.
- #define GTF_QMARK_CAST_INSTOF 0x80000000 // GT_QMARK -- Is this a top (not nested) level qmark created for castclass or instanceof?
+ #define GTF_QMARK_CAST_INSTOF 0x80000000 // GT_QMARK -- Is this a top (not nested) level qmark created for
+ // castclass or instanceof?
- #define GTF_BOX_VALUE 0x80000000 // GT_BOX -- "box" is on a value type
+ #define GTF_BOX_VALUE 0x80000000 // GT_BOX -- "box" is on a value type
#define GTF_ICON_HDL_MASK 0xF0000000 // Bits used by handle types below
#define GTF_STMT_CMPADD 0x80000000 // GT_STMT -- added by compiler
#define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT -- CSE def or use was subsituted
- #define GTF_STMT_TOP_LEVEL 0x20000000 // GT_STMT -- Top-level statement - true iff gtStmtList->gtPrev == nullptr
+ #define GTF_STMT_TOP_LEVEL 0x20000000 // GT_STMT -- Top-level statement -
+ // true iff gtStmtList->gtPrev == nullptr
// True for all stmts when in FGOrderTree
#define GTF_STMT_SKIP_LOWER 0x10000000 // GT_STMT -- Skip lowering if we already lowered an embedded stmt.
// Requires "this" to be a GT_IND. Requires the outermost caller to set "*pFldSeq" to nullptr.
// Returns true if it is an array index expression, or access to a (sequence of) struct field(s)
- // within a struct array element. If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq to the sequence
- // of struct field accesses.
+ // within a struct array element. If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq
+ // to the sequence of struct field accesses.
bool ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
// Requires "this" to be the address of a (possible) array element (or struct field within that).
// returns true and sets "*pFldSeq" to the sequence of fields with which those constants are annotated.
bool ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq);
- // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of an affine expression, to the value of the
- // of the index.
+ // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of
+ // an affine expression, to the value of the of the index.
void LabelIndex(Compiler* comp, bool isConst = true);
// Assumes that "this" occurs in a context where it is being dereferenced as the LHS of an assignment-like
regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters
#endif // LEGACY_BACKEND
+#if FEATURE_MULTIREG_RET
// State required to support multi-reg returning call nodes.
// For now it is enabled only for x64 unix.
//
// TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
-#if FEATURE_MULTIREG_RET
ReturnTypeDesc gtReturnTypeDesc;
// gtRegNum would always be the first return reg.
// So, for example:
// 1. Base + Index is legal with Scale==1
// 2. If Index is null, Scale should be zero (or unintialized / unused)
- // 3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of "Index*Scale + Offset".
+ // 3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of
+ // "Index*Scale + Offset".
// First operand is base address/pointer
bool HasBase() const { return gtOp1 != nullptr; }
// Represents GT_COPY or GT_RELOAD node
struct GenTreeCopyOrReload : public GenTreeUnOp
{
+#if FEATURE_MULTIREG_RET
// State required to support copy/reload of a multi-reg call node.
// First register is is always given by gtRegNum.
//
-#if FEATURE_MULTIREG_RET
regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
#endif
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
/*****************************************************************************/
#ifndef GTNODE
#error Define GTNODE before including this file.
/*****************************************************************************/
#undef GTNODE
/*****************************************************************************/
+// clang-format on
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
/*****************************************************************************/
#undef GTSTRUCT_4
#undef GTSTRUCT_N
/*****************************************************************************/
+
+// clang-format on
}
delete[] nodesThis;
- delete[] nodesOther;;
+ delete[] nodesOther;
}
indexType HbvNext(hashBv *bv, Compiler *comp);
+// clang-format off
#define FOREACH_HBV_BIT_SET(index, bv) \
{ \
for (int hashNum=0; hashNum<(bv)->hashtable_size(); hashNum++) {\
}\
}\
} \
-
+//clang-format on
#ifdef DEBUG
void SimpleDumpNode(hashBvNode *n);
BADCODE("stack overflow");
}
- // If we are pushing a struct, make certain we know the precise type!
#ifdef DEBUG
+ // If we are pushing a struct, make certain we know the precise type!
if (tree->TypeGet() == TYP_STRUCT)
{
assert(ti.IsType(TI_STRUCT));
// The rationale behind this workaround is to avoid modifying the VM and maintain compatibility between JIT64 and
// RyuJIT for the time being until we completely replace JIT64.
// TODO-ARM64-Cleanup: We probably want to actually modify the VM in the future to avoid the unnecesary two passes.
-#ifdef _TARGET_64BIT_
-#ifdef DEBUG
// In AMD64 we must make sure we're behaving the same way as JIT64, meaning we should only raise the verification
// exception if we are only importing and verifying. The method verNeedsVerification() can also modify the
// tiVerificationNeeded flag in the case it determines it can 'skip verification' during importation and defer it
// to a runtime check. That's why we must assert one or the other (since the flag tiVerificationNeeded can
// be turned off during importation).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+
+#ifdef DEBUG
bool canSkipVerificationResult = info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) != CORINFO_VERIFICATION_CANNOT_SKIP;
assert(tiVerificationNeeded || canSkipVerificationResult);
#endif // DEBUG
else
{
// We allow direct calls to value type constructors
- // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a constrained
- // callvirt to illegally re-enter a .ctor on a value of reference type.
+ // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a
+ // constrained callvirt to illegally re-enter a .ctor on a value of reference type.
VerifyOrReturn(tiThis.IsByRef() && DereferenceByRef(tiThis).IsValueClass(), "Bad call to a constructor");
}
}
// The non-varargs helper is enabled for CoreRT only for now. Enabling this
// unconditionally would require ReadyToRun version bump.
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if COR_JIT_EE_VERSION > 460
if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI))
if (isRecursive)
#endif
{
- // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the sequence.
- // Make sure we don't go past the end of the IL however.
+ // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the
+ // sequence. Make sure we don't go past the end of the IL however.
codeEnd = min(codeEnd + 1, info.compCode + info.compILCodeSize);
}
call->gtType = callRetTyp;
}
- /* Check for varargs */
#if !FEATURE_VARARG
+ /* Check for varargs */
if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
(sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
{
tailcall to a function with a different number of arguments, we
are hosed. There are ways around this (caller remembers esp value,
varargs is not caller-pop, etc), but not worth it. */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_X86_
if (canTailCall)
extraArg = gtNewArgList(instParam);
}
- // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular to inline
- // 'polytypic' operations such as static field accesses, type tests and method calls which
+ // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular
+ // to inline 'polytypic' operations such as static field accesses, type tests and method calls which
// rely on the exact context. The exactContextHnd is passed back to the JitInterface at appropriate points.
// exactContextHnd is not currently required when inlining shared generic code into shared
// generic code, since the inliner aborts whenever shared code polytypic operations are encountered
BADCODE("Stack should be empty after tailcall");
#endif //!_TARGET_64BIT_
}
-
-// assert(compCurBB is not a catch, finally or filter block);
-// assert(compCurBB is not a try block protected by a finally block);
+
+ // assert(compCurBB is not a catch, finally or filter block);
+ // assert(compCurBB is not a try block protected by a finally block);
// Check for permission to tailcall
bool explicitTailCall = (tailCall & PREFIX_TAILCALL_EXPLICIT) != 0;
0,
step);
assert(step->bbJumpKind == BBJ_ALWAYS);
- step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next finally in the chain)
+ step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+ // finally in the chain)
step->bbJumpDest->bbRefs++;
/* The new block will inherit this block's weight */
}
else
{
- // Calling the finally block. We already have a step block that is either the call-to-finally from a more nested
- // try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by a 'finally'), or the step
- // block is the return from a catch.
+ // Calling the finally block. We already have a step block that is either the call-to-finally from a
+ // more nested try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by
+ // a 'finally'), or the step block is the return from a catch.
//
- // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block directly. Note that if a
- // 'catch' ends without resetting the ThreadAbortException, the VM will automatically re-raise the exception, using the
- // return address of the catch (that is, the target block of the BBJ_EHCATCHRET) as the re-raise address. If this address
- // is in a finally, the VM will refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On
- // AMD64/ARM64, we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a finally clause
- // to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a BBJ_CALLFINALLY directly. (Note that on ARM32,
- // we don't mark the thunk specially -- it lives directly within the 'try' region protected by the finally, since we generate
- // code in such a way that execution never returns to the call-to-finally call, and the finally-protected 'try' region doesn't
- // appear on stack walks.)
+ // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block
+ // directly. Note that if a 'catch' ends without resetting the ThreadAbortException, the VM will
+ // automatically re-raise the exception, using the return address of the catch (that is, the target
+ // block of the BBJ_EHCATCHRET) as the re-raise address. If this address is in a finally, the VM will
+ // refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On AMD64/ARM64,
+ // we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a
+ // finally clause to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a
+ // BBJ_CALLFINALLY directly. (Note that on ARM32, we don't mark the thunk specially -- it lives directly
+ // within the 'try' region protected by the finally, since we generate code in such a way that execution
+ // never returns to the call-to-finally call, and the finally-protected 'try' region doesn't appear on
+ // stack walks.)
assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
#endif // !FEATURE_EH_CALLFINALLY_THUNKS
callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step);
- step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next finally in the chain)
+ step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+ // finally in the chain)
step->bbJumpDest->bbRefs++;
#if defined(_TARGET_ARM_)
// We are jumping out of a catch-protected try.
//
// If we are returning from a call to a finally, then we must have a step block within a try
- // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the finally
- // raises an exception), the VM will find this step block, notice that it is in a protected region, and invoke
- // the appropriate catch.
+ // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the
+ // finally raises an exception), the VM will find this step block, notice that it is in a protected region,
+ // and invoke the appropriate catch.
//
// We also need to handle a special case with the handling of ThreadAbortException. If a try/catch
// catches a ThreadAbortException (which might be because it catches a parent, e.g. System.Exception),
// and the catch doesn't call System.Threading.Thread::ResetAbort(), then when the catch returns to the VM,
- // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target address
- // of the catch return as the new exception address. That is, the re-raised exception appears to occur at
- // the catch return address. If this exception return address skips an enclosing try/catch that catches
- // ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should. For example:
+ // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target
+ // address of the catch return as the new exception address. That is, the re-raised exception appears to
+ // occur at the catch return address. If this exception return address skips an enclosing try/catch that
+ // catches ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should.
+ // For example:
//
// try {
// try {
// } finally { }
// OUTSIDE:
//
- // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block where
- // a finally would branch to (and such block is marked as finally target). Block B1 branches to step block. Because
- // of re-import of B0, Bstep is also orphaned. Since Bstep is a finally target it cannot be removed. To work around
- // this we will duplicate B0 (call it B0Dup) before reseting. B0Dup is marked as BBJ_CALLFINALLY and only serves to pair
- // up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1 will be treated as pair
- // and handled correctly.
+ // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block
+ // where a finally would branch to (and such block is marked as finally target). Block B1 branches to step block.
+ // Because of re-import of B0, Bstep is also orphaned. Since Bstep is a finally target it cannot be removed. To
+ // work around this we will duplicate B0 (call it B0Dup) before reseting. B0Dup is marked as BBJ_CALLFINALLY and
+ // only serves to pair up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1
+ // will be treated as pair and handled correctly.
if (block->bbJumpKind == BBJ_CALLFINALLY)
{
BasicBlock *dupBlock = bbNewBasicBlock(block->bbJumpKind);
// <BUGNUM> VSW 318822 </BUGNUM>
//
// So here we decide to make the resulting type to be a native int.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_64BIT_
if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
DONE_APPEND:
- // Remember at which BC offset the tree was finished
#ifdef DEBUG
+ // Remember at which BC offset the tree was finished
impNoteLastILoffs();
#endif
break;
{
// This is a sequence of (ldloc, dup, stloc). Can simplify
// to (ldloc, stloc). Goto LDVAR to reconstruct the ldloc node.
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (tiVerificationNeeded)
{
// From SPILL_APPEND
impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
- // From DONE_APPEND
#ifdef DEBUG
+ // From DONE_APPEND
impNoteLastILoffs();
#endif
op1 = NULL;
op1 = impPopStack().val;
assertImp(genActualTypeIsIntOrI(op1->TypeGet()));
- // Widen 'op1' on 64-bit targets
#ifdef _TARGET_64BIT_
+ // Widen 'op1' on 64-bit targets
if (op1->TypeGet() != TYP_I_IMPL)
{
if (op1->OperGet() == GT_CNS_INT)
op1->gtFlags |= GTF_IND_UNALIGNED;
}
- /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full trust apps). The reason this works is
- that JIT stores an i4 constant in Gentree union during importation and reads from the union as if it were a long during code generation.
- Though this can potentially read garbage, one can get lucky to have this working correctly.
+ /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full trust
+ apps). The reason this works is that JIT stores an i4 constant in Gentree union during importation
+ and reads from the union as if it were a long during code generation. Though this can potentially
+ read garbage, one can get lucky to have this working correctly.
- This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with /O2 switch (default when compiling
- retail configs in Dev10) and a customer app has taken a dependency on it. To be backward compatible, we will explicitly add an upward
- cast here so that it works correctly always.
+ This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with /O2
+ switch (default when compiling retail configs in Dev10) and a customer app has taken a dependency on
+ it. To be backward compatible, we will explicitly add an upward cast here so that it works correctly
+ always.
- Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for V4.0.
+ Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for
+ V4.0.
*/
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_X86_
if (op1->TypeGet() != op2->TypeGet() &&
op2->OperIsConst() &&
//Observation: the following code introduces a boxed value class on the stack, but,
//according to the ECMA spec, one would simply expect: tiRetVal = typeInfo(TI_REF,impGetObjectClass());
- /* Push the result back on the stack, */
- /* even if clsHnd is a value class we want the TI_REF */
- /* we call back to the EE to get find out what hte type we should push (for nullable<T> we push T) */
+ // Push the result back on the stack,
+ // even if clsHnd is a value class we want the TI_REF
+ // we call back to the EE to get find out what hte type we should push (for nullable<T> we push T)
tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(resolvedToken.hClass));
}
(unsigned)CHECK_SPILL_ALL);
}
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
// TODO-ARM64-NYI: HFA
// TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
// next ifdefs could be refactored in a single method with the ifdef inside.
-#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-#if defined(_TARGET_ARM_)
if (IsHfa(retClsHnd))
{
// Same as !IsHfa but just don't bother with impAssignStructPtr.
// This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
assert(retRegCount == MAX_RET_REG_COUNT);
// Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
impAppendTree(op2, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
// There are cases where the address of the implicit RetBuf should be returned explicitly (in RAX).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if defined(_TARGET_AMD64_)
+
// x64 (System V and Win64) calling convention requires to
// return the implicit return buffer explicitly (in RAX).
// Change the return type to be BYREF.
}
impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
- // Remember at which BC offset the tree was finished
#ifdef DEBUG
+ // Remember at which BC offset the tree was finished
impNoteLastILoffs();
#endif
return true;
}
#endif // _TARGET_64BIT_
+#if FEATURE_X87_DOUBLES
// X87 stack doesn't differentiate between float/double
// so promoting is no big deal.
// For everybody else keep it as float until we have a collision and then promote
// Just like for x64's TYP_INT<->TYP_I_IMPL
-#if FEATURE_X87_DOUBLES
if (multRef > 1 && tree->gtType == TYP_FLOAT)
{
// compiler -- compiler instance doing the inlining (root compiler)
// isPrejitRoot -- true if this compiler is prejitting the root method
+// clang-format off
DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
: LegacyPolicy(compiler, isPrejitRoot)
, m_Depth(0)
{
// Empty
}
+// clang-format on
//------------------------------------------------------------------------
// NoteBool: handle an observed boolean value
// R=0.55, MSE=177, MAE=6.59
//
// Suspect it doesn't handle factors properly...
+ // clang-format off
double sizeEstimate =
-13.532 +
0.359 * (int) m_CallsiteFrequency +
-5.357 * m_IsFromPromotableValueClass +
-7.901 * (m_ConstantArgFeedsConstantTest > 0 ? 1 : 0) +
0.065 * m_CalleeNativeSizeEstimate;
+ // clang-format on
// Scaled up and reported as an integer value.
m_ModelCodeSizeEstimate = (int) (SIZE_SCALE * sizeEstimate);
{
// Performance estimate based on GLMNET model.
// R=0.24, RMSE=16.1, MAE=8.9.
+ // clang-format off
double perCallSavingsEstimate =
-7.35
+ (m_CallsiteFrequency == InlineCallsiteFrequency::BORING ? 0.76 : 0)
+ (m_ArgType[3] == CORINFO_TYPE_BOOL ? 20.7 : 0)
+ (m_ArgType[4] == CORINFO_TYPE_CLASS ? 0.38 : 0)
+ (m_ReturnType == CORINFO_TYPE_CLASS ? 2.32 : 0);
+ // clang-format on
// Scaled up and reported as an integer value.
m_PerCallInstructionEstimate = (int) (SIZE_SCALE * perCallSavingsEstimate);
const char * CodeGen::genInsName(instruction ins)
{
+// clang-format off
static
const char * const insNames[] =
{
#error "Unknown _TARGET_"
#endif
};
+// clang-format on
+
assert((unsigned)ins < sizeof(insNames)/sizeof(insNames[0]));
assert(insNames[ins] != NULL);
const char * CodeGen::genSizeStr(emitAttr attr)
{
+// clang-format off
static
const char * const sizes[] =
{
0, 0, 0, 0, 0, 0, 0,
"ymmword ptr"
};
+// clang-format on
unsigned size = EA_SIZE(attr);
else
{
// Force the address into a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef LEGACY_BACKEND
genCodeForTree(addr, RBM_NONE);
#endif // LEGACY_BACKEND
}
#elif defined(_TARGET_ARMARCH_)
// Load imm into a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef LEGACY_BACKEND
regNumber immReg = regToUse;
assert(regToUse != REG_NA);
/*****************************************************************************/
+// clang-format off
DECLARE_TYPED_ENUM(instruction,unsigned)
{
#if defined(_TARGET_XARCH_)
#endif
InstructionSet_NONE
};
+// clang-format on
/*****************************************************************************/
#endif//_INSTR_H_
// * If the instruction writes to more than one destination register, update the function
// emitInsMayWriteMultipleRegs in emitArm.cpp.
+// clang-format off
INST9(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
// enum name FP LD/ST Rdn,Rm Rd,Rn,Rm Rdn,i8 Rd,Rn,i3 Rd,Rn,+i8<<i4 Rd,Rn,Rm{,sh} SP,i9 Rd,SP,i10 Rd,PC,i10
INST1(vmov_d2i, "vmov.d2i", 1, 0, IF_T2_VMOVD, 0xEC500B10) // A8.6.332 VMOV from a double to 2 int regs
INST1(vmov_i2f, "vmov.i2f", 1, 0, IF_T2_VMOVS, 0xEE000A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
INST1(vmov_f2i, "vmov.f2i", 1, 0, IF_T2_VMOVS, 0xEE100A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+// clang-format on
/*****************************************************************************/
#undef INST1
// * If the instruction writes to more than one destination register, update the function
// emitInsMayWriteMultipleRegs in emitArm64.cpp.
-
+// clang-format off
INST9(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
// enum name FP LD/ST DR_2E DR_2G DI_1B DI_1D DV_3C DV_2B DV_2C DV_2E DV_2F
INST1(uxtl2, "uxtl2", 0, 0, IF_DV_2O, 0x6F00A400)
// uxtl2 Vd,Vn DV_2O 011011110iiiiiii 101001nnnnnddddd 6F00 A400 Vd,Vn (shift - vector)
-
+// clang-format on
/*****************************************************************************/
#undef INST1
*
******************************************************************************/
+// clang-format off
#if !defined(_TARGET_XARCH_)
#error Unexpected target type
#endif
#undef INST4
#undef INST5
/*****************************************************************************/
+
+// clang-format on
#endif
#endif
+// Clang-format messes with the indentation of comments if they directly precede an
+// ifdef. This macro allows us to anchor the comments to the regular flow of code.
+#define CLANG_FORMAT_COMMENT_ANCHOR ;
+
#ifdef _MSC_VER
// These don't seem useful, so turning them off is no big deal
#pragma warning(disable:4510) // can't generate default constructor
#endif
#ifdef _MSC_VER
-#define CHECK_STRUCT_PADDING 0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after construct 'member_name'" on interesting structs/classes
+#define CHECK_STRUCT_PADDING 0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
+ // construct 'member_name'" on interesting structs/classes
#else
#define CHECK_STRUCT_PADDING 0 // Never enable it for non-MSFT compilers
#endif
#define VERIFY_GC_TABLES 0
#define REARRANGE_ADDS 1
-#define FUNC_INFO_LOGGING 1 // Support dumping function info to a file. In retail, only NYIs, with no function name, are dumped.
+#define FUNC_INFO_LOGGING 1 // Support dumping function info to a file. In retail, only NYIs, with no function name,
+ // are dumped.
/*****************************************************************************/
/*****************************************************************************/
/* Set these to 1 to collect and output various statistics about the JIT */
#define CALL_ARG_STATS 0 // Collect stats about calls and call arguments.
-#define COUNT_BASIC_BLOCKS 0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple case of single block methods.
-#define COUNT_LOOPS 0 // Collect stats about loops, such as the total number of natural loops, a histogram of the number of loop exits, etc.
+#define COUNT_BASIC_BLOCKS 0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
+ // case of single block methods.
+#define COUNT_LOOPS 0 // Collect stats about loops, such as the total number of natural loops, a histogram of
+ // the number of loop exits, etc.
#define COUNT_RANGECHECKS 0 // Count range checks removed (in lexical CSE?).
#define DATAFLOW_ITER 0 // Count iterations in lexical CSE and constant folding dataflow.
#define DISPLAY_SIZES 0 // Display generated code, data, and GC information sizes.
CONFIG_METHODSET(JitInclude, W("JitInclude"))
CONFIG_METHODSET(JitLateDisasm, W("JitLateDisasm"))
CONFIG_METHODSET(JitMinOptsName, W("JITMinOptsName")) // Forces MinOpts for a named function
-CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified methods
+CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified
+ // methods
CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // Disallow procedure splitting for specified methods if they contain exception handling
CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
}
else
{
- assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region of the same EH region
+ assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region
+ // of the same EH region
mostNestedRegion = block->bbHndIndex;
*inTryRegion = false;
}
{
// Use the offset of the beginning of the NOP padding, not the main block.
// This might include loop head padding, too, if this is a loop head.
- assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us anything
+ assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us
+ // anything
cookie = block->bbUnwindNopEmitCookie;
}
else
// twice the number of EH clauses in the IL, which should be good in practice.
// In extreme cases, we might need to abandon this and reallocate. See
// fgAddEHTableEntry() for more details.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
#else // DEBUG
// In Dev11 (Visual Studio 2012), x86 did not sort the EH table (it never had before)
// but ARM did. It turns out not sorting the table can cause the EH table to incorrectly
// set the bbHndIndex value in some nested cases, and that can lead to a security exploit
- // that allows the execution of arbitrary code.
+ // that allows the execution of arbitrary code.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
}
#endif // DEBUG
+
EHblkDsc * xtab1;
EHblkDsc * xtab2;
unsigned xtabnum1, xtabnum2;
}
}
-
// fgNormalizeEH: Enforce the following invariants:
//
// 1. No block is both the first block of a handler and the first block of a try. In IL (and on entry
//
// The benefit of this is, once again, to remove the need to consider every EH region when adding new blocks.
//
-// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or handler.
-// There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last" block of the try,
-// or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored, so this case doesn't apply.)
-// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will help.)
+// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or
+// handler. There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last"
+// block of the try, or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored,
+// so this case doesn't apply.)
+// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will
+// help.)
//
-// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to traverse
-// the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a single region (except
-// for mutually-protect "try" regions).
+// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to
+// traverse the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a
+// single region (except for mutually-protect "try" regions).
//
-// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block B2
-// violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a block to
-// enforce this invariant prevents us from having to consider the first block of T2's handler as a possible successor of B1.
-// This is somewhat akin to breaking of "critical edges" in a flowgraph.
+// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block
+// B2 violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a
+// block to enforce this invariant prevents us from having to consider the first block of T2's handler as a possible
+// successor of B1. This is somewhat akin to breaking of "critical edges" in a flowgraph.
void Compiler::fgNormalizeEH()
{
}
#if 0
- // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially blocks that are unreachable.
- // There are lots of asserts when such things occur. We will re-evaluate whether we can do this normalization.
+ // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially
+ // blocks that are unreachable. There are lots of asserts when such things occur. We will re-evaluate whether we
+ // can do this normalization.
// Note: there are cases in fgVerifyHandlerTab() that are also disabled to match this.
// Case #3: Prevent any two EH regions from ending with the same block.
//
// Case #1: Is the first block of a handler also the first block of any try?
//
- // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical 'try' begin
- // blocks as well as this case, e.g.:
+ // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical
+ // 'try' begin blocks as well as this case, e.g.:
// try {
// } finally { try { try {
// } catch {}
// } catch {}
// }
// where the finally/try/try are all the same block.
- // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin blocks (and
- // only those within the 'try' region's parents), not handler begin blocks, when we are inserting new header blocks.
+ // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin
+ // blocks (and only those within the 'try' region's parents), not handler begin blocks, when we are inserting new
+ // header blocks.
//
for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
BasicBlock* tryStart = eh->ebdTryBeg;
BasicBlock* insertBeforeBlk = tryStart; // If we need to insert new blocks, we insert before this block.
- // We need to keep track of the last "mutually protect" region so we can properly not add additional header blocks
- // to the second and subsequent mutually protect try blocks. We can't just keep track of the EH region
- // pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
+ // We need to keep track of the last "mutually protect" region so we can properly not add additional header
+ // blocks to the second and subsequent mutually protect try blocks. We can't just keep track of the EH
+ // region pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
// pre-update 'try' begin/last blocks themselves.
BasicBlock* mutualTryBeg = eh->ebdTryBeg;
BasicBlock* mutualTryLast = eh->ebdTryLast;
if (ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
{
+ // clang-format off
// Don't touch mutually-protect regions: their 'try' regions must remain identical!
// We want to continue the looping outwards, in case we have something like this:
//
//
// In this case, all the 'try' start at the same block! Note that there are two sets of mutually-protect regions,
// separated by some nesting.
+ // clang-format on
#ifdef DEBUG
if (verbose)
// | |----------- BB04
// |------------------ BB05
//
- // We'll loop twice, to create two header blocks, one for try2, and the second time for try3 (in that order).
+ // We'll loop twice, to create two header blocks, one for try2, and the second time for try3
+ // (in that order).
// After the first loop, we have:
//
// try3 try2 try1
// | |----------- BB04
// |------------------ BB05
//
- // And all the external edges have been changed to point at try2. On the next loop, we'll create a unique
- // header block for try3, and split the edges between try2 and try3, leaving us with:
+ // And all the external edges have been changed to point at try2. On the next loop, we'll create
+ // a unique header block for try3, and split the edges between try2 and try3, leaving us with:
//
// try3 try2 try1
// |---- BB07
// | |----------- BB04
// |------------------ BB05
- BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep track of the next pred in case it gets deleted.
+ BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep
+ // track of the next pred in case it gets deleted.
for (BasicBlockList* pred = insertBeforeBlk->bbCheapPreds; pred != nullptr; pred = nextPred)
{
nextPred = pred->next;
fgAddCheapPred(newTryStart, predBlock);
fgRemoveCheapPred(insertBeforeBlk, predBlock);
- // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will do nothing.
- // Since cheap preds contains dups (for switch duplicates), we will call this once per dup.
+ // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will
+ // do nothing. Since cheap preds contains dups (for switch duplicates), we will call
+ // this once per dup.
fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
#ifdef DEBUG
// |-------------------------- BB08
//
// (Thus, try1 & try2 start at BB03, and are nested inside try3 & try4, which both start at BB01.)
- // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process it
- // and try4.
+ // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process
+ // it and try4.
break;
}
bool modified = false;
//
- // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try' regions).
- // As above, there has to be EH region nesting for this to occur. However, since we need to consider handlers, there are more
- // cases.
+ // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try'
+ // regions). As above, there has to be EH region nesting for this to occur. However, since we need to consider
+ // handlers, there are more cases.
//
// There are four cases to consider:
// (1) try nested in try
// of an EH region (either 'try' or handler region), since that implies that its corresponding handler precedes it.
// That will never happen in C#, but is legal in IL.
//
- // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and the
- // nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try' must also
- // be within the outer handler, and obviously cannot share the same 'last' block.
+ // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and
+ // the nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try'
+ // must also be within the outer handler, and obviously cannot share the same 'last' block.
//
for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
INDEBUG(const char* outerType = ""; const char* innerType = "";)
// 'insertAfterBlk' is the place we will insert new "normalization" blocks. We don't know yet if we will
- // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we determine
- // the innermost region that is equivalent, we set this, and then update it incrementally as we loop outwards.
+ // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we
+ // determine the innermost region that is equivalent, we set this, and then update it incrementally as we
+ // loop outwards.
BasicBlock* insertAfterBlk = nullptr;
bool foundMatchingLastBlock = false;
// This is set to 'false' for mutual protect regions for which we will not insert a normalization block.
bool insertNormalizationBlock = true;
- // Keep track of what the 'try' index and handler index should be for any new normalization block that we insert.
- // If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll need to update
- // these as we go. For example:
+ // Keep track of what the 'try' index and handler index should be for any new normalization block that we
+ // insert. If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll
+ // need to update these as we go. For example:
// try { // EH#5
// ...
// catch { // EH#4
// BB05 // try=5, hnd=0 (no enclosing hnd)
// }
//
- unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler warnings.
+ unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler
+ // warnings.
unsigned nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
- // We compare the outer region against the inner region's 'try' or handler, determined by the 'outerIsTryRegion'
- // variable. Once we decide that, we know exactly the 'last' pointer that we will use to compare against
- // all enclosing EH regions.
+ // We compare the outer region against the inner region's 'try' or handler, determined by the
+ // 'outerIsTryRegion' variable. Once we decide that, we know exactly the 'last' pointer that we will use to
+ // compare against all enclosing EH regions.
//
- // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each nesting level):
+ // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each
+ // nesting level):
//
// try {
// ...
if (EHblkDsc::ebdIsSameTry(ehOuter, ehInner))
{
// We can't touch this 'try', since it's mutual protect.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
{
nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
- // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting within a filter).
+ // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting
+ // within a filter).
if (ehOuter->ebdHndLast == ehInner->ebdTryLast)
{
// Case (3) try nested in handler.
if (innerIsTryRegion && ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
{
// We can't touch this 'try', since it's mutual protect.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
{
insertNormalizationBlock = false;
- // We still need to update the 'last' pointer, in case someone inserted a normalization block before
- // the start of the mutual protect 'try' region.
+ // We still need to update the 'last' pointer, in case someone inserted a normalization
+ // block before the start of the mutual protect 'try' region.
ehOuter->ebdTryLast = insertAfterBlk;
}
else
}
}
- // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share any
- // 'last' blocks, so we're done. Note that we could have a situation like this:
+ // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share
+ // any 'last' blocks, so we're done. Note that we could have a situation like this:
//
// try4 try3 try2 try1
// |---- | | | BB01
if (!EHblkDsc::ebdIsSameTry(HBtab, HBtabOuter))
{
- // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the blocks
- // in the nested EH region. However, if funclets have been created, this is no longer true, since this 'try' might
- // be in a handler that is pulled out to the funclet region, while the outer 'try' remains in the main function
- // region.
+ // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the
+ // blocks in the nested EH region. However, if funclets have been created, this is no longer true, since
+ // this 'try' might be in a handler that is pulled out to the funclet region, while the outer 'try'
+ // remains in the main function region.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if FEATURE_EH_FUNCLETS
if (fgFuncletsCreated)
{
- // If both the 'try' region and the outer 'try' region are in the main function area, then we can do the normal
- // nesting check. Otherwise, it's harder to find a useful assert to make about their relationship.
+ // If both the 'try' region and the outer 'try' region are in the main function area, then we can
+ // do the normal nesting check. Otherwise, it's harder to find a useful assert to make about their
+ // relationship.
if ((bbNumTryLast < bbNumFirstFunclet) &&
(bbNumOuterTryLast < bbNumFirstFunclet))
{
if (multipleBegBlockNormalizationDone)
{
- assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same block (by EH normalization).
+ assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same
+ // block (by EH normalization).
}
else
{
}
if (multipleLastBlockNormalizationDone)
{
- assert(bbNumTryLast < bbNumOuterTryLast); // Two 'try' regions can't end at the same block (by EH normalization).
+ assert(bbNumTryLast < bbNumOuterTryLast); // Two 'try' regions can't end at the same block
+ //(by EH normalization).
}
else
{
}
}
- // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing try.
+ // With funclets, all we can say about the handler blocks is that they are disjoint from the
+ // enclosing try.
assert((bbNumHndLast < bbNumOuterTryBeg) || (bbNumOuterTryLast < bbNumHndBeg));
}
else
{
if (multipleBegBlockNormalizationDone)
{
- assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same block (by EH normalization).
+ assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same block
+ // (by EH normalization).
}
else
{
assert(bbNumOuterTryBeg <= bbNumTryBeg);
}
- assert(bbNumOuterTryBeg < bbNumHndBeg); // An inner handler can never start at the same block as an outer 'try' (by IL rules).
+ assert(bbNumOuterTryBeg < bbNumHndBeg); // An inner handler can never start at the same
+ // block as an outer 'try' (by IL rules).
if (multipleLastBlockNormalizationDone)
{
// An inner EH region can't share a 'last' block with the outer 'try' (by EH normalization).
}
// Check the handler region nesting, using ebdEnclosingHndIndex.
- // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it later.
+ // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+ // later.
if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
{
assert(bbNumOuterHndLast != 0);
assert(bbNumOuterHndBeg <= bbNumOuterHndLast);
- // The outer handler must completely contain all the blocks in the EH region nested within it. However, if funclets have been created,
- // it's harder to make any relationship asserts about the order of nested handlers, which also have been made into funclets.
+ // The outer handler must completely contain all the blocks in the EH region nested within it. However, if
+ // funclets have been created, it's harder to make any relationship asserts about the order of nested
+ // handlers, which also have been made into funclets.
#if FEATURE_EH_FUNCLETS
if (fgFuncletsCreated)
{
if (handlerBegIsTryBegNormalizationDone)
{
- assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an outer handler (by EH normalization).
+ assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+ // outer handler (by EH normalization).
}
else
{
}
if (multipleLastBlockNormalizationDone)
{
- assert(bbNumTryLast < bbNumOuterHndLast); // An inner 'try' can't end at the same block as an outer handler (by EH normalization).
+ assert(bbNumTryLast < bbNumOuterHndLast); // An inner 'try' can't end at the same block as an
+ // outer handler (by EH normalization).
}
else
{
assert(bbNumTryLast <= bbNumOuterHndLast);
}
- // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing handler.
+ // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing
+ // handler.
assert((bbNumHndLast < bbNumOuterHndBeg) || (bbNumOuterHndLast < bbNumHndBeg));
}
else
{
if (handlerBegIsTryBegNormalizationDone)
{
- assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an outer handler (by EH normalization).
+ assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+ // outer handler (by EH normalization).
}
else
{
assert(bbNumOuterHndBeg <= bbNumTryBeg);
}
- assert(bbNumOuterHndBeg < bbNumHndBeg); // An inner handler can never start at the same block as an outer handler (by IL rules).
+ assert(bbNumOuterHndBeg < bbNumHndBeg); // An inner handler can never start at the same block
+ // as an outer handler (by IL rules).
if (multipleLastBlockNormalizationDone)
{
// An inner EH region can't share a 'last' block with the outer handler (by EH normalization).
// Set up blockTryBegSet and blockHndBegSet.
// We might want to have this assert:
// if (fgNormalizeEHDone) assert(!blockTryBegSet[HBtab->ebdTryBeg->bbNum]);
- // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice (or more).
+ // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice
+ // (or more).
blockTryBegSet[HBtab->ebdTryBeg->bbNum] = true;
assert(!blockHndBegSet[HBtab->ebdHndBeg->bbNum]);
blockHndBegSet[HBtab->ebdHndBeg->bbNum] = true;
XTnum < compHndBBtabCount;
XTnum++, HBtab++)
{
- unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, ignoring 'mutual protect' trys
+ unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
{
- // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the funclet that don't already
- // have a more nested 'try' index because a 'try' is nested within the handler).
+ // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the
+ // funclet that don't already have a more nested 'try' index because a 'try' is nested within the
+ // handler).
BasicBlock* blockEnd;
for (block = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg), blockEnd = HBtab->ebdHndLast->bbNext; block != blockEnd; block = block->bbNext)
void gcMarkFilterVarsPinned();
- // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead, depending
- // on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE". The subset of registers whose corresponding
+ // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead,
+ // depending on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE". The subset of registers whose corresponding
// bits are set in "byRefMask" contain by-refs rather than regular GC pointers. "*pPtrRegs" is the set of
// registers currently known to contain pointers. If "mode" is "ASSIGN_SLOTS", computes and records slot
// ids for the registers. If "mode" is "DO_WORK", informs "gcInfoEncoder" about the state transition,
//
// These record the info about the procedure in the info-block
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef JIT32_GCENCODER
private:
#endif // !LEGACY_BACKEND
};
-
inline
unsigned char encodeUnsigned(BYTE *dest, unsigned value)
{
// (0xb3864c38, 0x4273, 0x58c5, 0x54, 0x5b, 0x8b, 0x36, 0x08, 0x34, 0x34, 0x71)); // Provider GUID
// int main(int argc, char* argv[]) // or DriverEntry for kernel-mode.
// {
-// TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the NULLs in C++ code.
+// TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the
+// // NULLs in C++ code.
// TraceLoggingWrite(g_hProvider,
// "MyEvent1",
// TraceLoggingString(argv[0], "arg0"),
// Threshold to detect if we are hitting too many bad (noway) methods
// over good methods per process to prevent logging too much data.
-static const double NOWAY_NOISE_RATIO = 0.6; // Threshold of (bad / total) beyond which we'd stop logging. We'd restart if the pass rate improves.
-static const unsigned NOWAY_SUFFICIENCY_THRESHOLD = 25; // Count of methods beyond which we'd apply percent threshold
+static const double NOWAY_NOISE_RATIO = 0.6; // Threshold of (bad / total) beyond which we'd stop
+ // logging. We'd restart if the pass rate improves.
+static const unsigned NOWAY_SUFFICIENCY_THRESHOLD = 25; // Count of methods beyond which we'd apply percent
+ // threshold
// Initialize Telemetry State
volatile bool JitTelemetry::s_fProviderRegistered = false;
}
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of
- // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up.
+ // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a
+ // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE,
+ // so round it up.
compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
compArgSize += argSize;
bool isParam = varDsc->lvIsParam;
for (unsigned index=0; index < 2; ++index)
- {
+ {
// Grab the temp for the field local.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
char buf[200];
default: // This must be a primitive var. Fall out of switch statement
break;
}
- // We only need this Quirk for _TARGET_64BIT_
#ifdef _TARGET_64BIT_
+ // We only need this Quirk for _TARGET_64BIT_
if (lvaTable[varNum].lvQuirkToLong)
{
noway_assert(lvaTable[varNum].lvAddrExposed);
#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+#ifndef DEBUG
// Assign slot numbers to all variables.
// If compiler generated local variables, slot numbers will be
// invalid (out of range of info.compVarScopes).
// We don't need to do this for IL, but this keeps lvSlotNum consistent.
-#ifndef DEBUG
if (opts.compScopeInfo && (info.compVarScopesCount > 0))
#endif
{
return result;
}
+// clang-format off
/*****************************************************************************
*
* Compute stack frame offsets for arguments, locals and optionally temps.
* relative or stack pointer relative.
*
*/
+// clang-format on
void Compiler::lvaAssignFrameOffsets(FrameLayoutState curState)
{
}
else
{
- // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
- // So, they can be accessed with positive offset.
- // On System V platforms, if the RA decides to home a register passed arg on the stack,
- // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
- // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
- // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
- // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
- // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence,
- // existence of alloca instructions, ret address pushed, ets.
+ // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack.
+ // This is where they are always homed. So, they can be accessed with positive offset.
+ // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack
+ // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed
+ // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive
+ // offset (from the caller's stack.)
+ // For System V platforms if there is no frame pointer the caller stack parameter offset should include the
+ // callee allocated space. If frame register is used, the callee allocated space should not be included for
+ // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets
+ // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret
+ // address pushed, ets.
varDsc->lvStkOffs = *callerArgOffset;
// Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
if (varDsc->lvIsRegArg)
{
/* Argument is passed in a register, don't count it
- * when updating the current offset on the stack */
+ * when updating the current offset on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !defined(_TARGET_ARMARCH_)
+#if DEBUG
// TODO: Remove this noway_assert and replace occurrences of sizeof(void *) with argSize
// Also investigate why we are incrementing argOffs for X86 as this seems incorrect
//
-#if DEBUG
noway_assert(argSize == sizeof(void *));
#endif // DEBUG
#endif
// r1 VACookie -- argOffs = 0
// -------------------------
//
- // Consider argOffs as if it accounts for number of prespilled registers before the current register.
- // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
- // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
- // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
+ // Consider argOffs as if it accounts for number of prespilled registers before the current
+ // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is
+ // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't
+ // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting
+ // for r1, equivalently r1 is skipped.
//
// If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
// r3 int a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
// r2 struct { int } a1
// r0-r1 struct { long } a0
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef PROFILING_SUPPORTED
// On Arm under profiler, r0-r3 are always prespilled on stack.
- // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct hfa2)
- // In which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an argument offset
- // less than size of preSpill.
+ // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct
+ // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an
+ // argument offset less than size of preSpill.
//
// For this reason the following conditions are asserted when not under profiler.
if (!compIsProfilerHookNeeded())
// For struct promoted parameters we need to set the offsets for both LclVars.
//
// For a dependent promoted struct we also assign the struct fields stack offset
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if !defined(_TARGET_64BIT_)
if ((varDsc->TypeGet() == TYP_LONG) && varDsc->lvPromoted)
{
if (opts.compNeedSecurityCheck)
{
- /* This can't work without an explicit frame, so make sure */
#ifdef JIT32_GCENCODER
+ /* This can't work without an explicit frame, so make sure */
noway_assert(codeGen->isFramePointerUsed());
#endif
stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs);
// If this isn't the final frame layout, assume we have to push an extra QWORD
// Just so the offsets are true upper limits.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef UNIX_AMD64_ABI
// The compNeedToAlignFrame flag is indicating if there is a need to align the frame.
// On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
/* Figure out and record the stack offset of the temp */
/* Need to align the offset? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_64BIT_
if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
{
// Calculate 'pad' as the number of bytes to align up 'stkOffs' to be a multiple of TARGET_POINTER_SIZE
- // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte aligned)
- // Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative value.
+ // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte
+ // aligned). Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative
+ // value.
//
int alignPad = (int)AlignmentPad((unsigned)-stkOffs, TARGET_POINTER_SIZE);
}
else
{
- // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home location.
- // Otherwise, it's always on the stack.
+ // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home
+ // location. Otherwise, it's always on the stack.
if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
lvaDumpFrameLocation(lclNum);
}
if ((tree->gtFlags & GTF_VAR_DEF) != 0 &&
(tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
{
-// if (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+ // if (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
}
else
{
-// if (!(fgCurDefSet & bitMask))
-// {
-// printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
-// printTreeID(tree);
-// printf("\n");
-// }
+ // if (!(fgCurDefSet & bitMask))
+ // {
+ // printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
+ // printTreeID(tree);
+ // printf("\n");
+ // }
/* We have the following scenarios:
* 1. "x += something" - in this case x is flagged GTF_VAR_USEASG
}
// Is this a use/def of a local variable?
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef LEGACY_BACKEND
// Generally, the last use information is associated with the lclVar node.
// However, for LEGACY_BACKEND, the information must be associated
// and we start computing life again from the op_ovf node (we go backwards). Note that we
// don't need to update ref counts because we don't change them, we're only changing the
// operation.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
}
#endif // DEBUG
+
switch (asgNode->gtOper)
{
case GT_ASG_ADD:
else
{
NO_SIDE_EFFECTS:
- /* No side effects - Remove the interior statement */
#ifdef DEBUG
if (verbose)
{
printf("\n");
}
#endif // DEBUG
+ /* No side effects - Remove the interior statement */
fgUpdateRefCntForExtract(asgNode, NULL);
/* Change the assignment to a GT_NOP node */
* a data section where this array will live and will emit code that based on the switch index, will indirect and
* jump to the destination specified in the jump table.
*
- * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch node
- * for jump table based switches.
+ * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
+ * node for jump table based switches.
* The overall structure of a GT_SWITCH_TABLE is:
*
* GT_SWITCH_TABLE
* else if (case == firstCase){ goto jumpTable[1]; }
* else if (case == secondCase) { goto jumptable[2]; } and so on.
*
- * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer require
- * internal temporaries to maintain the index we're evaluating plus we're using existing code from LinearCodeGen
- * to implement this instead of implement all the control flow constructs using InstrDscs and InstrGroups downstream.
+ * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
+ * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
+ * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
+ * InstrGroups downstream.
*/
void Lowering::LowerSwitch(GenTreePtr* pTree)
if (info->structDesc.eightByteCount == 1)
{
+ // clang-format off
// Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
//
// Here the IR for this operation:
// (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
// N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
//
+ // clang-format on
putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
}
else if (info->structDesc.eightByteCount == 2)
{
+ // clang-format off
// Case 2 above: Convert the LCL_FLDs to PUTARG_REG
//
// lowering call :
// (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
// N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
//
+ // clang-format on
assert(arg->OperGet() == GT_LIST);
GenTreeArgList* argListPtr = arg->AsArgList();
// control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
// \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
//
-// In this case, the GT_PUTARG_REG src is a nested call. We need to put the embedded statement after that call (as shown).
-// We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
+// In this case, the GT_PUTARG_REG src is a nested call. We need to put the embedded statement after that call
+// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
//
// Params:
// callNode - tail call node
// The below condition cannot be asserted in lower because fgSimpleLowering()
// can add a new basic block for range check failure which becomes
// fgLastBB with block number > loop header block number.
- //assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
+ // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+ // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
// If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
// a method returns. This is a case of caller method has both PInvokes and tail calls.
GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
{
- // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
- // Hence we should never see this type of call in lower.
-
#ifdef _TARGET_X86_
if (call->gtCallCookie != nullptr)
{
}
#endif
+ // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
+ // Hence we should never see this type of call in lower.
+
noway_assert(call->gtCallCookie == nullptr);
return nullptr;
// +10h +08h m_Next offsetOfFrameLink method prolog
// +18h +0Ch m_Datum offsetOfCallTarget call site
// +20h n/a m_StubSecretArg not set by JIT
-// +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method prolog;
-// non-x86: method prolog (SP remains constant in function,
-// after prolog: no localloc and PInvoke in same function)
+// +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
+// prolog;
+// non-x86: method prolog (SP remains
+// constant in function, after prolog: no
+// localloc and PInvoke in same function)
// +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
// +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
// +1Ch JIT retval spill area (int) before call_gc ???
// Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
// TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
// for x86, don't pass the secretArg.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_X86_
GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
// Gentree of the last top level stmnt should match.
assert(lastTopLevelStmtExpr == lastExpr);
- // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution order
- // so that it is guaranteed that there will be no further PInvokes after that point in the method.
+ // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
+ // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
//
// Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
// Op1, PME, GT_RETURN
// Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
// That is after PME, args for GT_JMP call will be setup.
- // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a PInvoke
- // call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant, it is harmeless.
+ // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
+ // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
+ // it is harmeless.
// Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
// PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
- // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to properly
- // extend the life of compLvFrameListRoot var.
- //
+ // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
+ // properly extend the life of compLvFrameListRoot var.
+ //
// Thread.offsetOfGcState = 0/1
// That is [tcb + offsetOfGcState] = 1
GenTree* storeGCState = SetGCState(1);
// // Call the JIT_PINVOKE_END helper
// JIT_PINVOKE_END(&opaqueFrame);
//
- // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target platform.
- // They may be changed in the future such that they preserve all register values.
+ // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
+ // platform. They may be changed in the future such that they preserve all register values.
GenTree* result = nullptr;
void* addr = nullptr;
{
assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
- GenTree* result = nullptr;
-
// An x86 JIT which uses full stub dispatch must generate only
// the following stub dispatch calls:
//
//
// THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
// vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
-
+
+ GenTree* result = nullptr;
+
#ifdef _TARGET_64BIT_
// Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
// exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
}
}
- // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the appropriate spots.
- // NOTE: there is a minor optimization opportunity here, as we still create p/invoke data structures and setup/teardown
- // even if we've eliminated all p/invoke calls due to dead code elimination.
+ // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
+ // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
+ // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
if (comp->info.compCallUnmanaged)
{
InsertPInvokeMethodProlog();
case GT_CAST:
{
- // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
+ // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+ // register.
// see CodeGen::genIntToIntCast()
info->srcCount = 1;
GenTreePtr blockSize = initBlkNode->Size();
GenTreePtr initVal = initBlkNode->InitVal();
+#if 0
// TODO-ARM64-CQ: Currently we generate a helper call for every
// initblk we encounter. Later on we should implement loop unrolling
// code sequences to improve CQ.
// For reference see the code in LowerXArch.cpp.
-#if 0
// If we have an InitBlk with constant block size we can speed this up by unrolling the loop.
if (blockSize->IsCnsIntOrI() &&
blockSize->gtIntCon.gtIconVal <= INITBLK_UNROLL_LIMIT &&
GenTreePtr blockSize = cpBlkNode->Size();
GenTreePtr srcAddr = cpBlkNode->Source();
+#if 0
// In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
// we should unroll the loop to improve CQ.
// TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
-#if 0
+
if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
{
assert(!blockSize->IsIconHandle());
}
else
{
- GenTreePtr tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ GenTreePtr tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType),
+ rotateLeftIndexNode);
rotateLeftIndexNode->InsertAfterSelf(tmp);
tree->gtOp.gtOp2 = tmp;
}
RegisterType registerType = call->TypeGet();
// Set destination candidates for return value of the call.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_X86_
if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
{
}
}
- // First, count reg args
#if FEATURE_VARARG
bool callHasFloatRegArgs = false;
#endif // !FEATURE_VARARG
+ // First, count reg args
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
assert(list->IsList());
short internalIntCount = 0;
if (remainingSlots > 0)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// This TYP_STRUCT argument is also passed in the outgoing argument area
// We need a register to address the TYP_STRUCT
- // And we may need 2
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
internalIntCount = 1;
#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // And we may need 2
internalIntCount = 2;
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
bool rev;
bool modifiedSources = false;
+#ifdef FEATURE_SIMD
// If indirTree is of TYP_SIMD12, don't mark addr as contained
// so that it always get computed to a register. This would
// mean codegen side logic doesn't need to handle all possible
//
// TODO-XArch-CQ: handle other addr mode expressions that could be marked
// as contained.
-#ifdef FEATURE_SIMD
if (indirTree->TypeGet() == TYP_SIMD12)
{
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
GenTreePtr andOp1 = op1->gtOp.gtOp1;
if (andOp1->isMemoryOp())
{
- // If the type of value memoryOp (andOp1) is not the same as the type of constant (andOp2)
- // check to see whether it is safe to mark AndOp1 as contained. For e.g. in the following
- // case it is not safe to mark andOp1 as contained
+ // If the type of value memoryOp (andOp1) is not the same as the type of constant
+ // (andOp2) check to see whether it is safe to mark AndOp1 as contained. For e.g. in
+ // the following case it is not safe to mark andOp1 as contained
// AndOp1 = signed byte and andOp2 is an int constant of value 512.
//
// If it is safe, we update the type and value of andOp2 to match with andOp1.
// If it is a GT_LCL_VAR, it still needs the reg to hold the address.
// We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit time.
- // Also, we don't need a reg for GT_CLS_VAR_ADDR.
+ // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+ // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
{
m_lsra->clearOperandCounts(indirDst);
"Internal registers" are registers used during the code sequence generated for the node.
The register lifetimes must obey the following lifetime model:
- First, any internal registers are defined.
- - Next, any source registers are used (and are then freed if they are last use and are not identified as "delayRegFree").
+ - Next, any source registers are used (and are then freed if they are last use and are not identified as
+ "delayRegFree").
- Next, the internal registers are used (and are then freed).
- Next, any registers in the kill set for the instruction are killed.
- Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
- If a lclVar node currently lives in some register, it may not be desirable to move it
(i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
but needs to be in a specific arg register for a call).
- - In other cases there may be conflicts on the restrictions placed by the defining node and the node which consumes it
+ - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
+ consumes it
- If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
- then LSRA is free to annotate the node with a different register. The code generator must issue the appropriate move.
- - However, if such a node is constrained to a set of registers, and its current location does not satisfy that requirement,
- LSRA must insert a GT_COPY node between the node and its parent. The gtRegNum on the GT_COPY node must satisfy the
- register requirement of the parent.
+ then LSRA is free to annotate the node with a different register. The code generator must issue the appropriate
+ move.
+ - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
+ requirement, LSRA must insert a GT_COPY node between the node and its parent. The gtRegNum on the GT_COPY node
+ must satisfy the register requirement of the parent.
- GenTree::gtRsvdRegs has a set of registers used for internal temps.
- - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been evaluated.
+ - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
+ evaluated.
- LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
In the new backend perhaps this should change (see also the note below under CodeGen).
- A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
- The register (gtRegNum) on the node indicates the register to which it must be reloaded.
- For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
with the register to which the variable must be reloaded.
- - For other nodes, since they represent both the def and use, if the value must be reloaded to a different register,
- LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
+ - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
+ register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
Local variable table (LclVarDsc):
- LclVarDsc::lvRegister is set to true if a local variable has the
// Spill info
newRP->isFixedRegRef = isFixedRegister;
+#ifndef _TARGET_AMD64_
// We don't need this for AMD because the PInvoke method epilog code is explicit
// at register allocation time.
-#ifndef _TARGET_AMD64_
if (theInterval != nullptr &&
theInterval->isLocalVar &&
compiler->info.compCallUnmanaged &&
// We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
// and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
// above).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType vars.
if (varDsc->lvType == LargeVectorType)
// registers current include the number of fp vars, whether there are loops, and whether there are
// multiple exits. These have been selected somewhat empirically, but there is probably room for
// more tuning.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (VERBOSE)
}
}
#endif
+
JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n",
floatVarCount,
compiler->fgHasLoops,
void
LinearScan::initVarRegMaps()
{
- assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
+ assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
+ // variables.
// The compiler memory allocator requires that the allocation be an
// even multiple of int-sized objects
int internalIntCount = tree->gtLsraInfo.internalIntCount;
regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
- // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set,
- // then they must be handled as fixed registers.
+ // If the number of internal integer registers required is the same as the number of candidate integer registers in
+ // the candidate set, then they must be handled as fixed registers.
// (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
bool fixedRegs = false;
regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
tempInterval->isInternal = true;
RefPosition *pos = newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
// We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
- // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding RefTypeUpperVectorSaveUse
- // RefPosition. We will then restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
- // of tempInterval.
+ // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
+ // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
+ // and set varInterval as the relatedInterval of tempInterval.
tempInterval->relatedInterval = varInterval->relatedInterval;
varInterval->relatedInterval = tempInterval;
}
// returned is in fact a predecessor.
//
// Notes:
-// This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be one of:
+// This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
+// one of:
// LSRA_BLOCK_BOUNDARY_PRED - Use the register locations of a predecessor block (default)
// LSRA_BLOCK_BOUNDARY_LAYOUT - Use the register locations of the previous block in layout order.
// This is the only case where this actually returns a different block.
// LSRA_BLOCK_BOUNDARY_ROTATE - Rotate the register locations from a predecessor.
// For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
-// the register locations will be "rotated" to stress the resolution and allocation code.
+// the register locations will be "rotated" to stress the resolution and allocation
+// code.
BasicBlock*
LinearScan::findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
foundBetterCandidate = true;
}
}
- // If both cover the range, prefer a register that is killed sooner (leaving the longer range register available).
- // If both cover the range and also getting killed at the same location, prefer the one which is same as previous
- // assignment.
+ // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
+ // available). If both cover the range and also getting killed at the same location, prefer the one which
+ // is same as previous assignment.
else if (nextPhysRefLocation > lastLocation)
{
if (nextPhysRefLocation < bestLocation)
isBetterLocation = (nextLocation <= farthestLocation);
}
else
- // the below if-stmt is associated with this else
#endif
+ // This if-stmt is associated with the above else
if (recentAssignedRefWeight < farthestRefPosWeight)
{
isBetterLocation = true;
// In this case, we will normally change it to REG_STK. We will update its "spilled" status when we
// encounter it in resolveLocalRef().
// 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register. This is
- // because the copyReg RefPosition will not have recorded the "home" register, yet downstream RefPositions
- // rely on the correct "home" register.
+ // because the copyReg RefPosition will not have recorded the "home" register, yet downstream
+ // RefPositions rely on the correct "home" register.
// 3. This variable was spilled before we reached the end of predBB. In this case, both targetReg and
// predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
// as reload during allocation time if necessary (note that by the time we actually reach the next
currentInterval->hasConflictingDefUse));
// It's already in a register, but not one we need.
- // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that the
- // needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
+ // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
+ // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
// The reason we need special handling for the "delayRegFree" case is that we need to mark the
// fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement).
// Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs
}
// Free registers to clear associated intervals for resolution phase
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (getLsraExtendLifeTimes())
{
break;
}
+#ifndef _TARGET_XARCH_
// We record tempregs for beginning and end of each block.
// For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
// TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
// modifies the varToRegMaps so we don't have all the correct registers at the time
// we need to get the tempReg.
-#ifndef _TARGET_XARCH_
regNumber tempRegInt = (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
#endif // !_TARGET_XARCH_
regNumber tempRegFlt = REG_NA;
//
// Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
// table is never resized. It is also limited by the size of the index, currently an unsigned char.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM64_)
static const int numMasks = 128;
LSRA_LIMIT_CALLER = 0x2,
LSRA_LIMIT_SMALL_SET = 0x3,
LSRA_LIMIT_MASK = 0x3 };
+
// When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
// registers, so as to get different coverage than limiting to callee or caller.
// At least for x86 and AMD64, and potentially other architecture that will support SIMD,
// we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
// Hence the "SmallFPSet" has 5 elements.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_AMD64_)
#ifdef UNIX_AMD64_ABI
LsraExtendLifetimes getLsraExtendLifeTimes() { return (LsraExtendLifetimes) (lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK); }
bool extendLifetimes() { return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES; }
- // This controls whether variables locations should be set to the previous block in layout order (LSRA_BLOCK_BOUNDARY_LAYOUT),
- // or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED - the default),
- // or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
+ // This controls whether variables locations should be set to the previous block in layout order
+ // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
+ // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
enum LsraBlockBoundaryLocations { LSRA_BLOCK_BOUNDARY_PRED = 0,
LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
LSRA_BLOCK_BOUNDARY_ROTATE = 0x200,
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
// memberName - enum member name
// memberValue - enum member value
// shortName - short name string
DEF_REFTYPE(RefTypeUpperVectorSaveDef, (0x40 | RefTypeDef), "UVSv" )
DEF_REFTYPE(RefTypeUpperVectorSaveUse, (0x40 | RefTypeUse), "UVRs" )
DEF_REFTYPE(RefTypeKillGCRefs , 0x80 , "KlGC" )
+// clang-format on
}
// do we need to do it in two steps R -> I, '-> smallType
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
if (dstSize < genTypeSize(TYP_INT))
{
}
#ifndef LEGACY_BACKEND
+#if FEATURE_MULTIREG_ARGS
// For RyuJIT backend we will expand a Multireg arg into a GT_LIST
// with multiple indirections, so here we consider spilling it into a tmp LclVar.
//
// Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
-#if FEATURE_MULTIREG_ARGS
+
bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
{
assert(argsComplete == true);
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nSorting the arguments:\n");
+ }
+#endif
+
/* Shuffle the arguments around before we build the gtCallLateArgs list.
The idea is to move all "simple" arguments like constants and local vars
to the end of the table, and move the complex arguments towards the beginning
+------------------------------------+ <--- argTable[0]
*/
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nSorting the arguments:\n");
- }
-#endif
-
/* Set the beginning and end for the new argument table */
unsigned curInx;
int regCount = 0;
else
{
// Create a temp assignment for the argument
- // Put the temp in the gtCallLateArgs list
+ // Put the temp in the gtCallLateArgs list
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (compiler->verbose)
{
// The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
// in the implementation of fast tail call.
// *********** END NOTE *********
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
// The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
#endif // _TARGET_X86_
/* Morph the user arguments */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(_TARGET_ARM_)
// HFA a3, // passed in f4/f5/f6
// double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
// HFA a5, // passed in f10/f11/f12
- // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill slots
+ // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
+ // // slots
// float a7, // passed in f1 (back-filled)
// float a8, // passed in f7 (back-filled)
// float a9, // passed in f13 (back-filled)
else // !lateArgsComputed
{
//
- // Figure out the size of the argument. This is either in number of registers, or number of TARGET_POINTER_SIZE
- // stack slots, or the sum of these if the argument is split between the registers and the stack.
+ // Figure out the size of the argument. This is either in number of registers, or number of
+ // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
+ // the stack.
//
if (argx->IsArgPlaceHolderNode() || (!isStructArg))
{
// The following if-then-else needs to be carefully refactored.
// Basically the else portion wants to turn a struct load (a GT_OBJ)
- // into a GT_IND of the appropriate size.
+ // into a GT_IND of the appropriate size.
// It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
// It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
// TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
// It also can't do this if we have a HFA arg,
// unless we have a 1-elem HFA in which case we want to do the optimization.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_X86_
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
// constituent fields, and so we have to re-assemble it
copyBlkClass = objClass;
#ifdef _TARGET_ARM_
- // Alignment constraints may cause us not to use (to "skip") some argument registers.
- // Add those, if any, to the skipped (int) arg reg mask.
+ // Alignment constraints may cause us not to use (to "skip") some argument
+ // registers. Add those, if any, to the skipped (int) arg reg mask.
fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
#endif // _TARGET_ARM_
}
}
else
{
- // if the valuetype size is not a multiple of sizeof(void*),
+ // If the valuetype size is not a multiple of sizeof(void*),
// we must copyblk to a temp before doing the obj to avoid
// the obj reading memory past the end of the valuetype
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
+ // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
copyBlkClass = objClass;
size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
// Now we know if the argument goes in registers or not and how big it is,
// whether we had to just compute it or this is a re-morph call and we looked it up.
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_ARM_
-
// If we ever allocate a floating point argument to the stack, then all
// subsequent HFA/float/double arguments go on the stack.
if (!isRegArg && passUsingFloatRegs)
#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifndef LEGACY_BACKEND
// In the future we can migrate UNIX_AMD64 to use this
// method instead of fgMorphSystemVStructArgs
-#ifndef LEGACY_BACKEND
+
// We only build GT_LISTs for MultiReg structs for the RyuJIT backend
if (hasMultiregStructArgs)
{
// Currently only ARM64 is using this method to morph the MultiReg struct args
// in the future AMD64_UNIX and for HFAs ARM32, will also use this method
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef _TARGET_ARM_
NYI_ARM("fgMorphMultiregStructArgs");
#endif
if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
{
- // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer registers
- // So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
+ // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
+ // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
//
JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", varNum);
//
GenTreePtr addr;
- // Widen 'index' on 64-bit targets
#ifdef _TARGET_64BIT_
+ // Widen 'index' on 64-bit targets
if (index->TypeGet() != TYP_I_IMPL)
{
if (index->OperGet() == GT_CNS_INT)
// contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
// large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
// checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
- /// This is left here to point out how to implement it.
+ // This is left here to point out how to implement it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
// If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
return;
}
- // Success!
-
#ifdef DEBUG
if (verbose)
{
// a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
// fgInline() will replace return value place holder with call node using gtCloneExpr() which is
// currently not copying/setting callSig.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (callee->IsTailPrefixedCall())
{
// If this is a 'regular' call, mark the basic block as
// having a call (for computing full interruptibility).
- //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
// Amd64 note: If this is a fast tail call then don't count it as a call
// since we don't insert GC-polls but instead make the method fully GC
// interruptible.
-#ifdef _TARGET_AMD64_
if (!call->IsFastTailCall())
#endif
{
{
noway_assert(call->TypeGet() == TYP_INT);
- // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType objects.
- // Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to GT_NE/GT_NE:
- // One important invariance that should never change is that type equivalency is always equivalent to object
- // identity equality for runtime type objects in reflection. This is also reflected in RuntimeTypeHandle::TypeEquals.
- // If this invariance would ever be broken, we need to remove the optimization below.
+ // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
+ // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
+ // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
+ // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
+ // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
+ // optimization below.
GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
}
// Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
- GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require copy-back).
+ GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
+ // copy-back).
unsigned retValTmpNum = BAD_VAR_NUM;
CORINFO_CLASS_HANDLE structHnd = nullptr;
if (call->HasRetBufArg() &&
// "dependently" promoted, so it will be in the right memory location. One possible
// further reason for avoiding field-wise stores is that the struct might have alignment-induced
// holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
- // concern, then we could compromise, and say that address-exposed + fields do not completely cover the memory
- // of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
+ // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
+ // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
if (varTypeIsStruct(lclVarTree) &&
(lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
{
#if CPU_USES_BLOCK_MOVE
compBlkOpUsed = true;
#endif
+#ifdef CPBLK_UNROLL_LIMIT
// Note that the unrolling of CopyBlk is only implemented on some platforms
// Currently that includes x64 and Arm64 but not x64 or Arm32
-#ifdef CPBLK_UNROLL_LIMIT
+
// If we have a CopyObj with a dest on the stack
// we will convert it into an GC Unsafe CopyBlk that is non-interruptible
// when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
// be a definition.
addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
assert(lvaTable[addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum].lvLclBlockOpAddr == 1);
- addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our local stack frame
+ addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
+ // local stack frame
}
}
}
// IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
// type handles and instances of System.Type
// If this invariant is ever broken, the optimization will need updating
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef LEGACY_BACKEND
if ( op1->gtOper == GT_CALL &&
if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
{
// Method Table tree
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef LEGACY_BACKEND
GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
#else
//
REMOVE:
// The data at optAssertionTabPrivate[i] is to be removed
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
//
if (!optValnumCSE_phase)
{
- //Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this is
- //all we need.
+ // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
+ // is all we need.
GenTreePtr op1SideEffects = NULL;
// The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
bool dstIsSafeLclVar;
case GT_ASG:
-
/* We'll convert "a = a <op> x" into "a <op>= x" */
/* and also "a = x <op> a" into "a <op>= x" for communative ops */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !LONG_ASG_OPS
if (typ == TYP_LONG)
// lost when assigning the op result to a small type var,
// but it may not be ok for the right shift operation where the higher bits
// could be shifted into the lower bits and preserved.
- // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) == (sbyte)x >>signed y))
- // as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) == (ubyte)x >>unsigned y), but
- // signed right shift of an unigned small type may give the wrong result:
+ // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
+ // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
+ // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the wrong
+ // result:
// e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
// but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
// The result becomes correct if we use >>unsigned instead of >>signed.
ssize_t ishf = op2->gtIntConCommon.IconValue();
ssize_t iadd = cns->gtIntConCommon.IconValue();
-// printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
+ // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
/* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
// (x >>> y) | (x << (-y + N))
// where N == bitsize(x), M is const, and
// M & (N - 1) == N - 1
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_64BIT_
if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
case GT_SUB: helper = CPX_R4_SUB; break;
case GT_MUL: helper = CPX_R4_MUL; break;
case GT_DIV: helper = CPX_R4_DIV; break;
-// case GT_MOD: helper = CPX_R4_REM; break;
+ // case GT_MOD: helper = CPX_R4_REM; break;
case GT_EQ : helper = CPX_R4_EQ ; break;
case GT_NE : helper = CPX_R4_NE ; break;
return tree;
}
+
/* This is a (real) return value -- check its type */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
GenTreeCall* call = morph->AsCall();
// Could either be
// - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
- // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
(call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP)));
}
/* This must be a tail call that caused a GCPoll to get
injected. We haven't actually morphed the call yet
but the flag still got set, clear it here... */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif
+
noway_assert(compTailCallUsed);
noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
noway_assert(stmt->gtNext == NULL);
// Could either be
// - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
- // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
(call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP)));
}
(genReturnBB != block) &&
((block->bbFlags & BBF_HAS_JMP) == 0))
{
- /* We'll jump to the genReturnBB */
+ /* We'll jump to the genReturnBB */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if !defined(_TARGET_X86_)
if (info.compFlags & CORINFO_FLG_SYNCH)
void Compiler::fgSetOptions()
{
-
- /* Should we force fully interruptible code ? */
-
#ifdef DEBUG
+ /* Should we force fully interruptible code ? */
if (JitConfig.JitFullyInt() ||
compStressCompile(STRESS_GENERIC_VARN, 30))
{
codeGen->setFramePointerRequiredGCInfo(true);
}
-// printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
+ // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
}
}
//
- // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the
- // incoming register into the stack frame slot.
+ // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+ // the the incoming register into the stack frame slot.
// In that case, we would like to avoid promortion.
// However we haven't yet computed the lvRefCnt values so we can't do that.
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if 0
// Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
{
// Normed struct
// A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
- // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 bytes).
- // Normally, the type of the local var and the type of GT_FIELD are equivalent. However, there
- // is one extremely rare case where that won't be true. An enum type is a special value type
- // that contains exactly one element of a primitive integer type (that, for CLS programs is named "value__").
- // The VM tells us that a local var of that enum type is the primitive type of the enum's single field.
- // It turns out that it is legal for IL to access this field using ldflda or ldfld. For example:
+ // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+ // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+ // there is one extremely rare case where that won't be true. An enum type is a special value type
+ // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+ // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+ // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+ // ldfld. For example:
//
// .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
// {
tree->gtLclFld.SetLclNum(fieldLclIndex);
// We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef ARM_SOFTFP
assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
#else
// In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
return WALK_SKIP_SUBTREES;
}
- // Otherwise...
#ifdef FEATURE_SIMD
if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
{
#endif
cse_def_cost = 2;
cse_use_cost = 2;
- extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller saved register
+ extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
+ // saved register
}
}
else // Conservative CSE promotion
#endif
cse_def_cost = 3;
cse_use_cost = 3;
- extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller saved register
+ extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
+ // saved register
}
// If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
// estimate the cost from lost codesize reduction if we do not perform the CSE
if (candidate->Size() > cse_use_cost)
{
- Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the weighted count
+ Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
+ // weighted count
extra_no_cost = candidate->Size() - cse_use_cost;
extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
}
// Record iterator.
optLoopTable[loopInd].lpIterTree = incr;
+#if COUNT_LOOPS
// Save the initial value of the iterator - can be lclVar or constant
// Flag the loop accordingly.
-#if COUNT_LOOPS
iterLoopCount++;
#endif
/* We will use the following terminology:
* HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
Not part of the looping of the loop.
- * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop, but not the outer loop. ???)
+ * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
+ * but not the outer loop. ???)
* TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
* BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
* EXIT - the loop exit or the block right after the bottom
* The example above is not a loop since we bail after the first iteration
*
* The condition we have to check for is
- * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is reachable,
- * it can only be reached through ENTRY, therefore we have a way back to ENTRY
+ * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
+ * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
*
* 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
* loop bottom then we cannot iterate
BasicBlock* topPredBlock = topPred->flBlock;
// Skip if topPredBlock is in the loop.
- // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization,
- // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we
- // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+ // Note that this uses block number to detect membership in the loop. We are adding blocks during
+ // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
+ // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
{
JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
}
/* Looks like a good idea to unroll this loop, let's do it! */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
// We've previously made a decision whether to have separate return epilogs, or branch to one.
// There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
- // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a heuristic tradeoff;
- // perhaps we're just choosing to live with 4 as the limit.)
+ // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
+ // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
if (fgReturnCount + loopRetCount > 4)
{
JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
/*extendRegion*/true);
BasicBlock::CloneBlockState(this, newBlk, blk);
- // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert the
- // cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding loop, if one
- // exists -- the parent of the loop we're cloning.
+ // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
+ // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
+ // loop, if one exists -- the parent of the loop we're cloning.
newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
if (newFirst == nullptr) newFirst = newBlk;
switch (oper)
{
/* Constants can usually be narrowed by changing their value */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_64BIT_
__int64 lval;
}
#endif
- // Test Data stuff..
#ifdef DEBUG
+ // Test Data stuff..
// If we have no test data, early out.
if (m_nodeTestData == NULL) return;
NodeToTestDataMap* testData = GetNodeTestData();
void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
{
// Do this loop, then recursively do all nested loops.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if LOOP_HOIST_STATS
// Record stats
/* NOTE: Caller ensures that this variable has only one def */
-// printf("limit [%d]:\n", add1); gtDispTree(op1);
-// printf("size [%d]:\n", add2); gtDispTree(op2);
-// printf("\n");
+ // printf("limit [%d]:\n", add1); gtDispTree(op1);
+ // printf("size [%d]:\n", add2); gtDispTree(op2);
+ // printf("\n");
}
{
// If the array address has been taken, don't do the optimization
// (this restriction can be lowered a bit, but i don't think it's worth it)
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose)
{
gtDispTree(pArray);
}
#endif
-
return false;
}
}
}
}
-// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi arguments.
-// If not a phi argument, check if we assertions about local variables.
+// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi
+// arguments. If not a phi argument, check if we assertions about local variables.
void RangeCheck::MergeAssertion(BasicBlock* block, GenTreePtr stmt, GenTreePtr op, SearchPath* path, Range* pRange DEBUGARG(int indent))
{
JITDUMP("Merging assertions from pred edges of BB%02d for op(%p) $%03x\n", block->bbNum, dspPtr(op), op->gtVNPair.GetConservative());
Compiler* comp = data->compiler;
GenTreeObj* obj = (*ppTree)->AsObj();
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// For UNIX struct passing, we can have Obj nodes for arguments.
// For other cases, we should never see a non-SIMD type here.
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
if (!varTypeIsSIMD(obj))
{
return;
{
// We are changing the child from GT_LCL_VAR TO GT_LCL_VAR_ADDR.
// Therefore gtType of the child needs to be changed to a TYP_BYREF
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (child->gtOper == GT_LCL_VAR)
{
static void DuplicateCommaProcessOneTree (Compiler* comp, Rationalizer* irt, BasicBlock* block, GenTree* tree);
static void FixupIfCallArg (GenTreeStack* parentStack,
- GenTree* oldChild,
+ GenTree* oldChild,
GenTree* newChild);
static void FixupIfSIMDLocal (Compiler* comp, GenTreeLclVarCommon* tree);
GenTreePtr rhs);
Location RewriteTopLevelComma(Location loc);
-
+
// SIMD related transformations
static void RewriteObj(GenTreePtr* ppTree, Compiler::fgWalkData* data);
static void RewriteCopyBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data);
static void RewriteInitBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data);
- // Intrinsic related
+ // Intrinsic related
static void RewriteNodeAsCall(GenTreePtr* ppTree, Compiler::fgWalkData* data,
CORINFO_METHOD_HANDLE callHnd,
#ifdef FEATURE_READYTORUN_COMPILER
CORINFO_CONST_LOOKUP entryPoint,
#endif
GenTreeArgList* args);
- static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);
+ static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);
};
inline Rationalizer::Rationalizer(Compiler* _comp)
BitScanForward(®, (DWORD)regmask);
return rpGetPredictForReg((regNumber)reg);
}
- /* It has multiple bits set */
+
#if defined(_TARGET_ARM_)
+ /* It has multiple bits set */
else if (regmask == (RBM_R0 | RBM_R1)) { result = PREDICT_PAIR_R0R1; }
else if (regmask == (RBM_R2 | RBM_R3)) { result = PREDICT_PAIR_R2R3; }
#elif defined(_TARGET_X86_)
+ /* It has multiple bits set */
else if (regmask == (RBM_EAX | RBM_EDX)) { result = PREDICT_PAIR_EAXEDX; }
else if (regmask == (RBM_ECX | RBM_EBX)) { result = PREDICT_PAIR_ECXEBX; }
#endif
#if defined(DEBUG) || !NOGC_WRITE_BARRIERS
{
#ifdef _TARGET_ARM_
- //
- // For the ARM target we have an optimized JIT Helper
- // that only trashes a subset of the callee saved registers
- //
#ifdef DEBUG
if (verbose)
printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
#endif
+ //
+ // For the ARM target we have an optimized JIT Helper
+ // that only trashes a subset of the callee saved registers
+ //
+
// NOTE: Adding it to the gtUsedRegs will cause the interference to
// be added appropriately
op1->gtUsedRegs |= RBM_R0;
op2->gtUsedRegs |= RBM_R1;
#else // _TARGET_ARM_
- // We have to call a normal JIT helper to perform the Write Barrier Assignment
- // It will trash the callee saved registers
#ifdef DEBUG
if (verbose)
printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
#endif
+ // We have to call a normal JIT helper to perform the Write Barrier Assignment
+ // It will trash the callee saved registers
+
tree->gtUsedRegs |= RBM_CALLEE_TRASH;
#endif // _TARGET_ARM_
}
{
predictReg = PREDICT_SCRATCH_REG;
}
- //
+#ifdef _TARGET_ARM_
// If we are widening an int into a long using a targeted register pair we
// should retarget so that the low part get loaded into the appropriate register
- //
-#ifdef _TARGET_ARM_
else if (predictReg == PREDICT_PAIR_R0R1)
{
predictReg = PREDICT_REG_R0;
}
#endif
#ifdef _TARGET_X86_
+ // If we are widening an int into a long using a targeted register pair we
+ // should retarget so that the low part get loaded into the appropriate register
else if (predictReg == PREDICT_PAIR_EAXEDX)
{
predictReg = PREDICT_REG_EAX;
case GT_NOP:
// these unary operators do not write new values
// and thus won't need a scratch register
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if OPT_BOOL_OPS
if (!op1)
// We will compute a new regMask that holds the register(s)
// that we will load the indirection into.
//
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifndef _TARGET_64BIT_
if (type == TYP_LONG)
{
/* Evaluate the <else> subtree */
// First record the post-then liveness, and reset the current liveness to the else
// branch liveness.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
VARSET_TP VARSET_INIT(this, postThenLive, compCurLife);
#endif
+
VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
if (promotedStructLocal != NULL)
{
- // All or a portion of this struct will be placed in the argument registers indicated by "curArgMask".
- // We build in knowledge of the order in which the code is generated here, so that the second arg to be evaluated
- // interferes with the reg for the first, the third with the regs for the first and second, etc.
- // But since we always place the stack slots before placing the register slots we do not add inteferences
- // for any part of the struct that gets passed on the stack.
+ // All or a portion of this struct will be placed in the argument registers indicated by
+ // "curArgMask". We build in knowledge of the order in which the code is generated here, so
+ // that the second arg to be evaluated interferes with the reg for the first, the third with
+ // the regs for the first and second, etc. But since we always place the stack slots before
+ // placing the register slots we do not add inteferences for any part of the struct that gets
+ // passed on the stack.
argPredictReg = PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
regMaskTP prevArgMask = RBM_NONE;
}
}
-
- // Mark required registers for emitting tailcall profiler callback as used
#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Mark required registers for emitting tailcall profiler callback as used
if (compIsProfilerHookNeeded() &&
tree->gtCall.IsTailCall() &&
(tree->gtCall.gtCallType == CT_USER_FUNC))
noway_assert(refCntEBP == 0);
#endif
- /* Determine how the EBP register should be used */
-
#ifdef DEBUG
if (verbose)
{
}
#endif
+ /* Determine how the EBP register should be used */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if DOUBLE_ALIGN
+
if (!codeGen->isFramePointerRequired())
{
noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
{
/* It's probably better to use EBP as a frame pointer */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
if (refCntWtdEBP > refCntWtdStkDbl * 2)
{
/* It's probably better to use EBP to enregister integer variables */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (verbose)
printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
goto NO_DOUBLE_ALIGN;
}
- /*
- OK we passed all of the benefit tests
- so we'll predict a double aligned frame
- */
#ifdef DEBUG
if (verbose)
printf("; Predicting to create a double-aligned frame\n");
#endif
+ /*
+ OK we passed all of the benefit tests
+ so we'll predict a double aligned frame
+ */
+
rpFrameType = FT_DOUBLE_ALIGN_FRAME;
goto REVERSE_EBP_ENREG;
}
if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
{
#ifdef _TARGET_XARCH_
+ // clang-format off
/* If we are using EBP to enregister variables then
will we actually save bytes by setting up an EBP frame?
// We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
// to set up an EBP frame in the prolog and epilog
#define EBP_FRAME_SETUP_SIZE 5
+ // clang-format on
if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
{
if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
{
/* It's not be a good idea to use EBP in our predictions */
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (verbose && (refCntEBP > 0))
printf("; Predicting that it's not worth using EBP to enregister variables\n");
lclNum++ , varDsc++)
{
// For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef LEGACY_BACKEND
varDsc->lvOnFrame = false;
#endif // LEGACY_BACKEND
#ifdef DEBUGGING_SUPPORT
+#ifdef DEBUG
/* For debugging, note that we have to reserve space even for
unused variables if they are ever in scope. However, this is not
an issue as fgExtendDbgLifetimes() adds an initialization and
variables in scope will not have a zero ref-cnt.
*/
-#ifdef DEBUG
if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
{
for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
noway_assert(lvaLclSize(lclNum) != 0);
#endif // FEATURE_FIXED_OUT_ARGS
- varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the stack frame
+ varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
+ // stack frame
NOT_STK:;
varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
+
/*****************************************************************************/
/*****************************************************************************/
#ifndef REGDEF
#undef REGALIAS
#undef XMMMASK
/*****************************************************************************/
+
+// clang-format on
if (!isBackFilled)
{
- // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // For System V the reg type counters should be independent.
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ // For System V the reg type counters should be independent.
nextReg(TYP_INT, numRegs);
nextReg(TYP_FLOAT, numRegs);
#else
+ // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
nextReg(type, numRegs);
#endif
}
}
#endif // _TARGET_ARM_
- assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the arg can't be enregistered
+ assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the
+ // arg can't be enregistered
regArgNum(type) += cAlignSkipped;
return cAlignSkipped;
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
+
/*****************************************************************************/
/*****************************************************************************/
#ifndef REGDEF
#undef REGDEF
#undef REGALIAS
/*****************************************************************************/
+
+// clang-format on
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
+
/*****************************************************************************/
/*****************************************************************************/
#ifndef REGDEF
#undef REGDEF
#undef REGALIAS
/*****************************************************************************/
+
+// clang-format on
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+// clang-format off
/*****************************************************************************/
/*****************************************************************************/
#ifndef REGDEF
/*****************************************************************************/
#undef REGDEF
/*****************************************************************************/
+
+// clang-format on
/* Can't mark a register pair more than once as used */
-// assert((regMask & rsMaskUsed) == 0);
+ // assert((regMask & rsMaskUsed) == 0);
/* Mark the registers as 'used' */
/* Normally, trees are unspilled in the order of being spilled due to
the post-order walking of trees during code-gen. However, this will
not be true for something like a GT_ARR_ELEM node */
-
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef LEGACY_BACKEND
SpillDsc* multi = rsSpillDesc[reg];
#endif // LEGACY_BACKEND
#endif
- /* Record the new value for the register. ptr var needed for
- * lifetime extension
- */
-
#ifdef DEBUG
if (compiler->verbose)
printf("\t\t\t\t\t\t\tThe register %s now holds V%02u\n", compiler->compRegVarName(reg), var);
#endif
+ /* Record the new value for the register. ptr var needed for
+ * lifetime extension
+ */
+
rsRegValues[reg].rvdKind = RV_LCL_VAR;
// If this is a cast of a 64 bit int, then we must have the low 32 bits.
}
}
- /* Do we need to allocate a new temp */
#ifdef DEBUG
+ /* Do we need to allocate a new temp */
bool isNewTemp = false;
#endif // DEBUG
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
-
/*****************************************************************************
*
* Keep track of the current state of each register. This is intended to be
if (bValue)
{
- // Mark as used
-
#ifdef DEBUG
if (m_rsCompiler->verbose)
{
}
#endif
+ // Mark as used
assert((rsGetMaskLock() & regMask) == 0);
#if FEATURE_STACK_FP_X87
}
else
{
- // Mark as free
-
#ifdef DEBUG
if (m_rsCompiler->verbose)
{
}
#endif
+ // Mark as free
assert((rsGetMaskUsed() & regMask) == regMask);
// Are we freeing a multi-use registers?
// Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc)
// Select(op1, op2, op3) = (op2 & op1) | (op3 & !op1)
- // = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1), SIMDIntrinsicBitwiseAndNot(op3, op1))
+ // = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1),
+ // SIMDIntrinsicBitwiseAndNot(op3, op1))
//
// If Op1 has side effect, create an assignment to a temp
GenTree* tmp = op1;
// return the address node.
//
// TODO-CQ:
-// 1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field are located contiguously or not.
-// In future we should support more cases.
-// 2.Though it happens to just work fine front-end phases are not aware of GT_LEA node. Therefore, convert these to use GT_ADDR .
+// 1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field
+// are located contiguously or not. In future we should support more cases.
+// 2. Though it happens to just work fine front-end phases are not aware of GT_LEA node. Therefore, convert these
+// to use GT_ADDR.
GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize)
{
assert(tree->OperGet() == GT_FIELD || tree->OperGet() == GT_INDEX);
// so that this sturct won't be promoted.
// e.g. s.x x is a field, and s is a struct, then we should set the s's lvUsedInSIMDIntrinsic as true.
// so that s won't be promoted.
- // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that s1 can be promoted, so that s2 can be promoted.
- // The reason for that is if we don't allow s1 to be promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
+ // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that
+ // s1 can be promoted, so that s2 can be promoted. The reason for that is if we don't allow s1 to be
+ // promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
// TODO-CQ:
- // In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's address is used for
- // initializing the vector, then s1 can be promoted but s2 can't.
+ // In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's
+ // address is used for initializing the vector, then s1 can be promoted but s2 can't.
if(varTypeIsSIMD(obj) && obj->OperIsLocal())
{
setLclRelatedToSIMDIntrinsic(obj);
instruction ins = ins_Store(type);
if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
{
- // In general, when we use a three-operands move instruction, we want to merge the src with itself.
- // This is an exception in that we actually want the "merge" behavior, so we must specify it with
- // all 3 operands.
+ // In general, when we use a three-operands move instruction, we want to merge the src with
+ // itself. This is an exception in that we actually want the "merge" behavior, so we must
+ // specify it with all 3 operands.
inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
}
else
if (baseType == TYP_FLOAT)
{
// v0 = v1 * v2
- // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its position
+ // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its
+ // // position
// tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1)
// v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1)
// tmp = v0
// tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2)
// v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
// // Essentially horizontal addtion of all elements.
- // // We could achieve the same using SSEv3 instruction HADDPS.
+ // // We could achieve the same using SSEv3 instruction
+ // // HADDPS.
//
inst_RV_RV(INS_mulps, targetReg, op2Reg);
inst_RV_RV(INS_movaps, tmpReg, targetReg);
#endif
/*****************************************************************************/
+// clang-format off
#ifdef FEATURE_SIMD
/*
#endif //!_TARGET_AMD64_
#endif //FEATURE_SIMD
+// clang-format on
const SM_OPCODE s_CodeSeqs[][MAX_CODE_SEQUENCE_LENGTH] =
{
- // ==== Single opcode states ====
#define SMOPDEF(smname,string) {smname, CODE_SEQUENCE_END},
+// ==== Single opcode states ====
#include "smopcode.def"
#undef SMOPDEF
{SM_CONV_R4, SM_MUL, CODE_SEQUENCE_END},
{SM_CONV_R4, SM_DIV, CODE_SEQUENCE_END},
- // {SM_CONV_R8, SM_ADD, CODE_SEQUENCE_END}, // Removed since it collides with ldelem.r8 in Math.InternalRound
- // {SM_CONV_R8, SM_SUB, CODE_SEQUENCE_END}, // Just remove the SM_SUB as well.
+ // {SM_CONV_R8, SM_ADD, CODE_SEQUENCE_END}, // Removed since it collides with ldelem.r8 in
+ // Math.InternalRound
+ // {SM_CONV_R8, SM_SUB, CODE_SEQUENCE_END}, // Just remove the SM_SUB as well.
{SM_CONV_R8, SM_MUL, CODE_SEQUENCE_END},
{SM_CONV_R8, SM_DIV, CODE_SEQUENCE_END},
//
// States in the state machine
//
+// clang-format off
const SMState g_SMStates[] =
{
// {term, len, lng, prev, SMOpcode and SMOpcodeName , offsets } // state ID and name
{ 1, 5, 195, 247, (SM_OPCODE) 74 /* add */, 0 }, // state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
{ 1, 5, 195, 247, (SM_OPCODE) 75 /* sub */, 0 }, // state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
};
+// clang-format on
static_assert_no_msg(NUM_SM_STATES == sizeof(g_SMStates)/sizeof(g_SMStates[0]));
//
// JumpTableCells in the state machine
//
+// clang-format off
const JumpTableCell g_SMJumpTableCells[] =
{
// {src, dest }
{ 0, 0 }, // cell# 416
{ 0, 0 }, // cell# 417
};
+// clang-format on
const JumpTableCell * gp_SMJumpTableCells = g_SMJumpTableCells;
#ifdef SSA_FEATURE_DOMARR
// Allocate space for constant time computation of (a DOM b?) query.
- unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so add 1.
+ unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so
+ // add 1.
m_pDomPreOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
m_pDomPostOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
// Check if we've already inserted a phi node.
if (GetPhiNode(bbInDomFront, lclNum) == NULL)
{
- // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of j.
- // So insert a phi node at l.
+ // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+ // j. So insert a phi node at l.
JITDUMP("Inserting phi definition for V%02u at start of BB%02u.\n", lclNum, bbInDomFront->bbNum);
GenTreePtr phiLhs = m_pCompiler->gtNewLclvNode(lclNum, m_pCompiler->lvaTable[lclNum].TypeGet());
- // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG nodes.
- // However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
- // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in SsaBuilder::AssignPhiNodeRhsVariables()
- // and in SsaBuilder::AddDefToHandlerPhis()
- //
+ // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG
+ // nodes. However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
+ // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in
+ // SsaBuilder::AssignPhiNodeRhsVariables() and in SsaBuilder::AddDefToHandlerPhis()
+
GenTreePtr phiRhs = m_pCompiler->gtNewOperNode(GT_PHI, m_pCompiler->lvaTable[lclNum].TypeGet(), nullptr);
GenTreePtr phiAsg = m_pCompiler->gtNewAssignNode(phiLhs, phiRhs);
// Check if we've already inserted a phi node.
if (bbInDomFront->bbHeapSsaPhiFunc == NULL)
{
- // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of j.
- // So insert a phi node at l.
+ // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+ // j. So insert a phi node at l.
JITDUMP("Inserting phi definition for Heap at start of BB%02u.\n", bbInDomFront->bbNum);
bbInDomFront->bbHeapSsaPhiFunc = BasicBlock::EmptyHeapPhiDef;
}
if (!blockWrk.m_processed)
{
- // Push the block back on the stack with "m_processed" true, to record the fact that when its children have been
- // (recursively) processed, we still need to call BlockPopStacks on it.
+ // Push the block back on the stack with "m_processed" true, to record the fact that when its children have
+ // been (recursively) processed, we still need to call BlockPopStacks on it.
blocksToDo->push_back(BlockWork(block, true));
// Walk the block give counts to DEFs and give top of stack count for USEs.
// iterated dominance frontiers. (Recall that the dominance frontier of a block B is the set of blocks
// B3 such that there exists some B2 s.t. B3 is a successor of B2, and B dominates B2. Note that this dominance
// need not be strict -- B2 and B may be the same node. The iterated dominance frontier is formed by a closure
- // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance frontier
- // of all elements of the set.)
+ // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance
+ // frontier of all elements of the set.)
BlkToBlkSetMap* ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count);
// Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
// Requires "pRenameState" to be non-NULL and be currently used for variables renaming.
void BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState);
- // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers. Requires "pRenameState"
- // to be non-NULL and be currently used for variables renaming. Assumes that "isPhiDefn" implies that any definition occurring within "tree"
- // is a phi definition.
+ // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers.
+ // Requires "pRenameState" to be non-NULL and be currently used for variables renaming. Assumes that "isPhiDefn"
+ // implies that any definition occurring within "tree" is a phi definition.
void TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn);
// Assumes that "block" contains a definition for local var "lclNum", with SSA number "count".
assert(compiler->compMayHaveTransitionBlocks);
assert(compiler->compHndBBtabCount == 0);
- // Create a temp block
#ifdef DEBUG
compiler->fgSafeBasicBlockCreation = true;
#endif
+ // Create a temp block
BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
#ifdef DEBUG
/*****************************************************************************/
+// clang-format off
#if defined(_TARGET_X86_)
#define CPU_LOAD_STORE_ARCH 0
#define CPOBJ_NONGC_SLOTS_LIMIT 4 // For CpObj code generation, this is the the threshold of the number
// of contiguous non-gc slots that trigger generating rep movsq instead of
// sequences of movsq instructions
-
// The way we're currently disabling rep movs/stos is by setting a limit less than
// its unrolling counterparts. When lower takes the decision on which one to make it
// always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
#define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls.
- #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+ #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when
+ // the flags need to be set
#ifdef LEGACY_BACKEND
#define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register
#define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register
#define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value.
#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
- #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+ #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the
+ // RBM_CALLEE_TRASH registers
#else
- #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers).
+ #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using
+ // ASM barriers we definitely don't have NOGC barriers).
#endif
#define USER_ARGS_COME_LAST 0
#define EMIT_TRACK_STACK_DEPTH 1
- #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target
- #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+ #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this
+ // target
+ #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
+ // filter-handler, fault) and directly execute 'finally' clauses.
#define FEATURE_EH_FUNCLETS 0
- #define FEATURE_EH_CALLFINALLY_THUNKS 0 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+ #define FEATURE_EH_CALLFINALLY_THUNKS 0 // Generate call-to-finally code in "thunks" in the enclosing EH region,
+ // protected by "cloned finally" clauses.
#ifndef LEGACY_BACKEND
#define FEATURE_STACK_FP_X87 0
#else // LEGACY_BACKEND
#define FEATURE_STACK_FP_X87 1 // Use flat register file model
#endif // LEGACY_BACKEND
- #define FEATURE_X87_DOUBLES 0 // FP tree temps always use x87 doubles (when 1) or can be double or float (when 0).
- #define ETW_EBP_FRAMED 1 // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+ #define FEATURE_X87_DOUBLES 0 // FP tree temps always use x87 doubles (when 1) or can be double or float
+ // (when 0).
+ #define ETW_EBP_FRAMED 1 // if 1 we cannot use EBP as a scratch register and must create EBP based
+ // frames for most methods
#define CSE_CONSTS 1 // Enable if we want to CSE constants
#ifndef LEGACY_BACKEND
// Registers used by PInvoke frame setup
#define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
#define RBM_PINVOKE_FRAME RBM_EDI
- #define REG_PINVOKE_TCB REG_ESI // ESI is set to Thread Control Block (TCB) on return from CORINFO_HELP_INIT_PINVOKE_FRAME helper
+ #define REG_PINVOKE_TCB REG_ESI // ESI is set to Thread Control Block (TCB) on return from
+ // CORINFO_HELP_INIT_PINVOKE_FRAME helper
#define RBM_PINVOKE_TCB RBM_ESI
#define REG_PINVOKE_SCRATCH REG_EAX // EAX is trashed by CORINFO_HELP_INIT_PINVOKE_FRAME helper
#define RBM_PINVOKE_SCRATCH RBM_EAX
#define RBM_NON_BYTE_REGS RBM_NONE
#define RBM_BYTE_REG_FLAG RBM_NONE
#endif
+// clang-format on
/*****************************************************************************/
class Target
DEF_TP(tn ,nm , jitType, verType, sz,sze,asze, st,al, tf, howUsed )
*/
+// clang-format off
DEF_TP(UNDEF ,"<UNDEF>" , TYP_UNDEF, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
DEF_TP(VOID ,"void" , TYP_VOID, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
#endif // FEATURE_SIMD
DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
+// clang-format on
#undef GCS
#undef BRS
#define UWC_END 0xFF // "end" unwind code
#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 19)
#define UW_MAX_CODE_WORDS_COUNT 15 // Max number that can be encoded in the "Code Words" field of the .pdata record
-#define UW_MAX_EPILOG_START_INDEX 0xFFU // Max number that can be encoded in the "Epilog Start Index" field of the .pdata record
+#define UW_MAX_EPILOG_START_INDEX 0xFFU // Max number that can be encoded in the "Epilog Start Index" field
+ // of the .pdata record
#elif defined(_TARGET_ARM64_)
const unsigned MAX_PROLOG_SIZE_BYTES = 100;
const unsigned MAX_EPILOG_SIZE_BYTES = 100;
#define UW_MAX_EPILOG_START_INDEX 0x3FFU
#endif // _TARGET_ARM64_
-#define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field of the .pdata record
-#define UW_MAX_EXTENDED_CODE_WORDS_COUNT 0xFFU // Max number that can be encoded in the "Extended Code Words" field of the .pdata record
-#define UW_MAX_EXTENDED_EPILOG_COUNT 0xFFFFU // Max number that can be encoded in the "Extended Epilog Count" field of the .pdata record
-#define UW_MAX_EPILOG_START_OFFSET 0x3FFFFU // Max number that can be encoded in the "Epilog Start Offset" field of the .pdata record
+#define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field
+ // of the .pdata record
+#define UW_MAX_EXTENDED_CODE_WORDS_COUNT 0xFFU // Max number that can be encoded in the "Extended Code Words"
+ // field of the .pdata record
+#define UW_MAX_EXTENDED_EPILOG_COUNT 0xFFFFU // Max number that can be encoded in the "Extended Epilog Count"
+ // field of the .pdata record
+#define UW_MAX_EPILOG_START_OFFSET 0x3FFFFU // Max number that can be encoded in the "Epilog Start Offset"
+ // field of the .pdata record
//
// Forward declaration of class defined in emit.h
// set of epilogs, for this function/funclet.
bool ufiInProlog;
- static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0; // Something unlikely to be the fill pattern for uninitialized memory
+ static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0; // Something unlikely to be the fill pattern for
+ // uninitialized memory
unsigned ufiInitialized;
#endif // DEBUG
#ifdef DEBUG
- static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1; // Something unlikely to be the fill pattern for uninitialized memory
+ static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1; // Something unlikely to be the fill pattern for
+ // uninitialized memory
unsigned uwiInitialized;
#endif // DEBUG
// The prolog codes that are already at the end of the array need to get moved to the middle,
// with space for the non-matching epilog codes to follow.
+
memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes), &upcMem[upcCodeSlot], prologBytes);
// Note that the three UWC_END padding bytes still exist at the end of the array.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
// Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
{
if (ufiEmitLoc == NULL)
{
- ufiStartOffset = 0; // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ ufiStartOffset = 0;
}
else
{
assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
unsigned epilogCount = 0;
- unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the prolog codes
+ unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the
+ // prolog codes
unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes
UnwindEpilogInfo* pEpi;
DWORD finalSize =
headerBytes
- + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words, including end padding if necessary
+ + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words,
+ // including end padding if necessary
// Construct the final unwind information.
// funKind: funclet kind
// pHotCode: hot section code buffer
// pColdCode: cold section code buffer
-// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a function/funclet.
+// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+// function/funclet.
// isHotCode: are we allocating the unwind info for the hot code section?
void UnwindFragmentInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
if (uwiFragmentLast->ufiEmitLoc == NULL)
{
- startOffset = 0; // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ startOffset = 0;
}
else
{
// the actual offsets of the splits since we haven't issued the instructions yet, so store
// an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
// like we do for the function length and epilog offsets.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
if (uwiComp->verbose)
#define DECLARE_DATA
+// clang-format off
extern
const signed char opcodeSizes[] =
{
#undef InlineSwitch_size
#undef InlinePhi_size
};
-
+// clang-format on
const BYTE varTypeClassification[] =
{
sep = " ";
// What kind of separator should we use for this range (if it is indeed going to be a range)?
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#if defined(_TARGET_AMD64_)
// For AMD64, create ranges for int registers R8 through R15, but not the "old" registers.
if (regNum >= REG_R8)
#error Unsupported or unset target architecture
#endif // _TARGET_*
}
- // We've already printed a register. Is this the end of a range?
+
#if defined(_TARGET_ARM64_)
+ // We've already printed a register. Is this the end of a range?
else if ((regNum == REG_INT_LAST)
|| (regNum == REG_R17) // last register before TEB
|| (regNum == REG_R28)) // last register before FP
#else // _TARGET_ARM64_
+ // We've already printed a register. Is this the end of a range?
else if (regNum == REG_INT_LAST)
#endif // _TARGET_ARM64_
{
{
const BYTE * opcodePtr = codeAddr + offs;
const BYTE * startOpcodePtr = opcodePtr;
- const unsigned ALIGN_WIDTH = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for most things
+ const unsigned ALIGN_WIDTH = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for
+ // most things
if (prefix != NULL)
printf("%s", prefix);
unsigned* pBudget,
bool* pUsedRecursiveVN)
{
- // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
TailCall:
+ // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
assert(arg0VN != NoVN && arg1VN != NoVN);
assert(arg0VN == VNNormVal(arg0VN)); // Arguments carry no exceptions.
assert(arg1VN == VNNormVal(arg1VN)); // Arguments carry no exceptions.
assert(VNFuncArity(func) == 3);
// Function arguments carry no exceptions.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
if (func != VNF_PhiDef)
{
unsigned lclNum = lclVarTree->GetLclNum();
- // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have SSA names
- // in which to store VN's on defs. We'll yield unique VN's when we read from them.
+ // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have
+ // SSA names in which to store VN's on defs. We'll yield unique VN's when we read from them.
if (!fgExcludeFromSsa(lclNum))
{
unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
}
#endif // DEBUG
}
- // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be
- // able to give VN's to.
+ // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we
+ // want to be able to give VN's to.
tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
}
else
// TODO-CQ: Why not be complete, and get this case right?
fgMutateHeap(tree DEBUGARG("COPYBLK - non local"));
}
- // Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want to be
- // able to give VN's to.
+ // Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want
+ // to be able to give VN's to.
tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
}
}
// We don't want to fabricate arbitrary value numbers to things we can't reason about.
// So far, we know about two of these cases:
// Case 1) We have a local var who has never been defined but it's seen as a use.
- // This is the case of storeIndir(addr(lclvar)) = expr. In this case since we only take the
- // address of the variable, this doesn't mean it's a use nor we have to initialize it, so in this
- // very rare case, we fabricate a value number.
+ // This is the case of storeIndir(addr(lclvar)) = expr. In this case since we only
+ // take the address of the variable, this doesn't mean it's a use nor we have to
+ // initialize it, so in this very rare case, we fabricate a value number.
// Case 2) Local variables that represent structs which are assigned using CpBlk.
GenTree* nextNode = lcl->gtNext;
assert((nextNode->gtOper == GT_ADDR && nextNode->gtOp.gtOp1 == lcl) ||
assert(lcl->gtVNPair.BothDefined());
}
- // TODO-Review: For the short term, we have a workaround for copyblk/initblk. Those that use addrSpillTemp will have a statement like
- // "addrSpillTemp = addr(local)." If we previously decided that this block operation defines the local, we will have
- // labeled the "local" node as a DEF (or USEDEF). This flag propogates to the "local" on the RHS. So we'll assume that
- // this is correct, and treat it as a def (to a new, unique VN).
+ // TODO-Review: For the short term, we have a workaround for copyblk/initblk. Those that use
+ // addrSpillTemp will have a statement like "addrSpillTemp = addr(local)." If we previously decided
+ // that this block operation defines the local, we will have labeled the "local" node as a DEF
+ // (or USEDEF). This flag propogates to the "local" on the RHS. So we'll assume that this is correct,
+ // and treat it as a def (to a new, unique VN).
else if ((lcl->gtFlags & GTF_VAR_DEF) != 0)
{
LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
// 2: volatile read s;
// 3: read s;
//
- // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated in between them)...
- // but we *should* be able to prove that the values read in 2 and 3 are the same.
+ // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated
+ // in between them)... but we *should* be able to prove that the values read in 2 and 3 are the same.
//
ValueNumPair clsVarVNPair;
{
FieldSeqNode* fieldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[1]);
- // Either "arg" is the address of (part of) a local itself, or the assignment is an "indirect assignment",
- // where an outer comma expression assigned the address of a local to a temp, and that temp is our lhs, and
- // we recorded this in a table when we made the indirect assignment...or else we have a "rogue" PtrToLoc, one
- // that should have made the local in question address-exposed. Assert on that.
+ // Either "arg" is the address of (part of) a local itself, or the assignment is an
+ // "indirect assignment", where an outer comma expression assigned the address of a
+ // local to a temp, and that temp is our lhs, and we recorded this in a table when we
+ // made the indirect assignment...or else we have a "rogue" PtrToLoc, one that should
+ // have made the local in question address-exposed. Assert on that.
GenTreeLclVarCommon* lclVarTree = NULL;
bool isEntire = false;
unsigned lclDefSsaNum = SsaConfig::RESERVED_SSA_NUM;
else if (fldSeq2 != nullptr)
{
// Get the first (instance or static) field from field seq. Heap[field] will yield the "field map".
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq2->m_fieldHnd);
if (obj != nullptr)
template<typename T>
static T EvalOp(VNFunc vnf, T v0);
- // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and returns (T)0.
- // Otherwise, returns vnf(v0, v1).
+ // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and
+ // returns (T)0. Otherwise, returns vnf(v0, v1).
template<typename T>
T EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
unsigned m_numMapSels;
#endif
- // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level MapSelect
- // application.
+ // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level
+ // MapSelect application.
unsigned m_mapSelectBudget;
public:
// Get a new, unique value number for an expression that we're not equating to some function.
ValueNum VNForExpr(var_types typ = TYP_UNKNOWN);
- // This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
+// This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
#define FEATURE_VN_TRACE_APPLY_SELECTORS 1
// Return the value number corresponding to constructing "MapSelect(map, f0)", where "f0" is the
// When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount.
ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN);
- // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer with
- // a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
+ // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer
+ // with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB);
- // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the pointer form
- // extended with "fieldSeq"; or else NoVN.
+ // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the
+ // pointer form extended with "fieldSeq"; or else NoVN.
ValueNum ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fieldSeq);
// Queries on value numbers.
bool VNIsValid(ValueNum vn);
#ifdef DEBUG
- // This controls whether we recursively call vnDump on function arguments.
+// This controls whether we recursively call vnDump on function arguments.
#define FEATURE_VN_DUMP_FUNC_ARGS 0
// Prints, to standard out, a representation of "vn".
static bool isReservedVN(ValueNum);
#define VALUENUM_SUPPORT_MERGE 0
+#if VALUENUM_SUPPORT_MERGE
// If we're going to support the Merge operation, and do it right, we really need to use an entire
// egraph data structure, so that we can do congruence closure, and discover congruences implied
// by the eq-class merge.
-#if VALUENUM_SUPPORT_MERGE
+
// It may be that we provisionally give two expressions distinct value numbers, then later discover
// that the values of the expressions are provably equal. We allow the two value numbers to be
// "merged" -- after the merge, they represent the same abstract value.
// "m_typ" and "m_attribs". These properties determine the interpretation of "m_defs", as discussed below.
struct Chunk
{
- // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other members.
- // The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the next one to allocate).
+ // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other
+ // members. The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the
+ // next one to allocate).
void* m_defs;
unsigned m_numUsed;
// Defines the functions understood by the value-numbering system.
// ValueNumFuncDef(<name of function>, <arity (1-4)>, <is-commutative (for arity = 2)>, <non-null (for gc functions)>, <is-shared-static>)
+// clang-format off
ValueNumFuncDef(MapStore, 3, false, false, false)
ValueNumFuncDef(MapSelect, 2, false, false, false)
ValueNumFuncDef(StrCns, 2, false, true, false)
ValueNumFuncDef(Unbox, 2, false, true, false)
-
+// clang-format on
#undef ValueNumFuncDef
typedef AllVarSetOps::ValArgType ALLVARSET_VALARG_TP;
typedef AllVarSetOps::RetValType ALLVARSET_VALRET_TP;
-
-// Initialize "varName" to "initVal." Copies contents, not references; if "varName" is uninitialized, allocates a var set
-// for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
+// Initialize "varName" to "initVal." Copies contents, not references; if "varName" is uninitialized, allocates a var
+// set for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
#define VARSET_INIT(comp, varName, initVal) varName(VarSetOps::MakeCopy(comp, initVal))
#define ALLVARSET_INIT(comp, varName, initVal) varName(AllVarSetOps::MakeCopy(comp, initVal))