From 9a31fc473fd06749a34e503f972c516784ff26ad Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Tue, 24 May 2016 14:03:49 -0700 Subject: [PATCH] Enable HFA support for passing arguments on ARM64 Fixes dotnet/coreclr#4946 ARM64: ABI - Passing HFA struct arguments in floating point registers We are now passing 24 additional HFA tests and have one test regression Previously HFA support was enabled on ARM32 using #ifdef _TARGET_ARM_ Now HFA support is enabled for both platform using #ifdef FEATURE_HFA Note that FEATURE_HFA is a VM defined and enable only for platforms that have HFA support The VM is responsible for determining if a stuct is a HFA or not The JIT Calls this method CorInfoType CEEInfo::getHFAType(CORINFO_CLASS_HANDLE hClass) to identify HFA types Note that when using an AltJit targeting Arm32 or Arm64 we will never see an HFA type In CodegenArm64.cpp Method genPutArgStk Implement passing of HFA structs on the stack Refactored to handle both 16-byte structs and HFA struct Track GC types for the 16-byte structs or he floating point types for HFA's Use ldp when we have a 16-byte struct with no GC pointers Added asserts to check that we never write past the end of the outgoing arg area In CodegenCommon.cpp Method genFnPrologCalleRegArgs Implement the homing of incoming HDF variables These are currently homed into a stack based struct as we did for ARM32 Use floating point types and registers when handling HFAs Added asserts to check that we never write past the end of the stack based struct Added Dump method for fgArgTabEntry to display how arguments will be passed Added GetNextSlotNum for fgArgInfo which returns what stack offset we would use for the next stack base argument Fixed tree dump so that it can print multireg struct arguments In Morph.cpp Method ArgsComplete We currently chose to spill odd sized structs (11,13,14,15 bytes in size) into a GT_LCL_VAR temp so that we don't need to use more than two instructions to pass a GT_OBJ struct. (Since we cannot read beyond the end of a GT_OBJ struct) Method fgMorpgArgs Handle HFAs for multireg and stack arguments In this method 'size' is the number of registers used when passing an argument in registers or the number of TARGET_POINTER_SIZE stack slots when passing them on the stack For HFA this means that 'size' can change if we can't pass them using registers. Use new Dump method to print out how each argument is passed (what register or what stack slot) Method fgMorphMultiregStuctArg Implement the expansion of multireg HFA arguments into GT_LISTs Refactored to handle both 16-byte structs and HFA struct Track GC types for the 16-byte structs or he floating point types for HFA's Changes from code review feedback Commit migrated from https://github.com/dotnet/coreclr/commit/b2efe59bec6e0b70b2ede2dc0301a790d465e6a4 --- src/coreclr/src/jit/CMakeLists.txt | 38 +- src/coreclr/src/jit/ClrJit.PAL.exports | 3 + src/coreclr/src/jit/codegenarm64.cpp | 524 +++++++++++++-------- src/coreclr/src/jit/codegencommon.cpp | 227 +++++---- src/coreclr/src/jit/codegenlegacy.cpp | 4 +- src/coreclr/src/jit/compiler.cpp | 21 +- src/coreclr/src/jit/compiler.h | 136 ++++-- src/coreclr/src/jit/ee_il_dll.cpp | 28 +- src/coreclr/src/jit/flowgraph.cpp | 25 +- src/coreclr/src/jit/gentree.cpp | 65 +-- src/coreclr/src/jit/importer.cpp | 48 +- src/coreclr/src/jit/lclvars.cpp | 113 +++-- src/coreclr/src/jit/lowerarm64.cpp | 10 +- src/coreclr/src/jit/lsra.cpp | 4 +- src/coreclr/src/jit/morph.cpp | 653 ++++++++++++++++++-------- src/coreclr/src/jit/protojit/CMakeLists.txt | 10 +- src/coreclr/src/jit/regalloc.cpp | 33 +- src/coreclr/src/jit/scopeinfo.cpp | 4 +- src/coreclr/src/jit/standalone/CMakeLists.txt | 10 +- src/coreclr/src/jit/target.h | 24 +- src/coreclr/tests/arm64/Tests.lst | 50 +- 21 files changed, 1295 insertions(+), 735 deletions(-) create mode 100644 src/coreclr/src/jit/ClrJit.PAL.exports diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index f54a174..7f1a080 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -143,30 +143,48 @@ set( SOURCES convert_to_absolute_path(SOURCES ${SOURCES}) -if( WIN32 ) - +if(WIN32) add_precompiled_header(jitpch.h ../jitpch.cpp SOURCES) # Create .def file containing a list of exports preceeded by # 'EXPORTS'. The file "ClrJit.exports" already contains the list, so we # massage it into the correct format here to create "ClrJit.exports.def". - set(CLRJIT_EXPORTS_DEF ${CMAKE_CURRENT_BINARY_DIR}/ClrJit.exports.def) - set(CLRJIT_EXPORTS_DEF_TEMP ${CLRJIT_EXPORTS_DEF}.txt) + set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/ClrJit.exports.def) + set(JIT_EXPORTS_FILE_TEMP ${JIT_EXPORTS_FILE}.txt) file(READ "ClrJit.exports" exports_list) - file(WRITE ${CLRJIT_EXPORTS_DEF_TEMP} "LIBRARY CLRJIT\n") - file(APPEND ${CLRJIT_EXPORTS_DEF_TEMP} "EXPORTS\n") - file(APPEND ${CLRJIT_EXPORTS_DEF_TEMP} ${exports_list}) + file(WRITE ${JIT_EXPORTS_FILE_TEMP} "LIBRARY CLRJIT\n") + file(APPEND ${JIT_EXPORTS_FILE_TEMP} "EXPORTS\n") + file(APPEND ${JIT_EXPORTS_FILE_TEMP} ${exports_list}) # Copy the file only if it has changed. execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CLRJIT_EXPORTS_DEF_TEMP} ${CLRJIT_EXPORTS_DEF}) + ${JIT_EXPORTS_FILE_TEMP} ${JIT_EXPORTS_FILE}) - set(SHARED_LIB_SOURCES ${SOURCES} ${CLRJIT_EXPORTS_DEF}) + set(SHARED_LIB_SOURCES ${SOURCES} ${JIT_EXPORTS_FILE}) else() + set(JIT_EXPORTS_IN_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports.in) + file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.exports" jit_exports) + file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.PAL.exports" pal_exports) + file(WRITE ${JIT_EXPORTS_IN_FILE} ${jit_exports}) + file(APPEND ${JIT_EXPORTS_IN_FILE} "\n") + file(APPEND ${JIT_EXPORTS_IN_FILE} ${pal_exports}) + + set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports) + generate_exports_file(${JIT_EXPORTS_IN_FILE} ${JIT_EXPORTS_FILE}) + + if(CMAKE_SYSTEM_NAME STREQUAL Linux OR CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL NetBSD) + # This is required to force using our own PAL, not one that we are loaded with. + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Xlinker -Bsymbolic -Bsymbolic-functions") + + set(JIT_EXPORTS_LINKER_OPTION -Wl,--version-script=${JIT_EXPORTS_FILE}) + elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin) + set(JIT_EXPORTS_LINKER_OPTION -Wl,-exported_symbols_list,${JIT_EXPORTS_FILE}) + endif() + set(SHARED_LIB_SOURCES ${SOURCES}) endif() -set(CLR_EXPORTED_SYMBOL_FILE ${CLRJIT_EXPORTS_DEF}) +add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE}) set(JIT_BASE_NAME clrjit) if (CLR_BUILD_JIT32) diff --git a/src/coreclr/src/jit/ClrJit.PAL.exports b/src/coreclr/src/jit/ClrJit.PAL.exports new file mode 100644 index 0000000..c6b4e8e --- /dev/null +++ b/src/coreclr/src/jit/ClrJit.PAL.exports @@ -0,0 +1,3 @@ +DllMain +PAL_RegisterModule +PAL_UnregisterModule diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index aa405a5..067e788 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -2154,7 +2154,6 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, { // reg cannot be a FP register assert(!genIsValidFloatReg(reg)); - if (!compiler->opts.compReloc) { size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs @@ -5491,6 +5490,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) if (varDsc->lvIsMultiregStruct()) { + if (varDsc->lvIsHfa()) + { + NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg"); + } // Restore the next register. argRegNext = genMapRegArgNumToRegNum(genMapRegNumToRegArgNum(argReg, loadType) + 1, loadType); loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]); @@ -6449,22 +6452,30 @@ CodeGen::genIntrinsic(GenTreePtr treeNode) void CodeGen::genPutArgStk(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_PUTARG_STK); - var_types targetType = treeNode->TypeGet(); - emitter *emit = getEmitter(); + var_types targetType = treeNode->TypeGet(); + GenTreePtr source = treeNode->gtOp.gtOp1; + emitter * emit = getEmitter(); - // Get argument offset on stack. + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut; + unsigned argOffsetMax; // Records the maximum size of this area for assert checks + + // This is the varNum for our load operations, + // only used when we have a multireg struct with a LclVar source + unsigned varNumInp = BAD_VAR_NUM; + + // Get argument offset to use with 'varNumOut' // Here we cross check that argument offset hasn't changed from lowering to codegen since // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; + unsigned argOffsetOut = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; #ifdef DEBUG fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); assert(curArgTabEntry); - assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); -#endif // DEBUG - - GenTreePtr data = treeNode->gtOp.gtOp1; - unsigned varNum; // typically this is the varNum for the Outgoing arg space + assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); +#endif // DEBUG #if FEATURE_FASTTAILCALL bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; @@ -6476,7 +6487,8 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) // All other calls - stk arg is setup in out-going arg area. if (putInIncomingArgArea) { - varNum = getFirstArgWithStackSlot(); + varNumOut = getFirstArgWithStackSlot(); + argOffsetMax = compiler->compArgSize; #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); @@ -6484,279 +6496,377 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) // Since it is a fast tail call, the existence of first incoming arg is guaranteed // because fast tail call requires that in-coming arg area of caller is >= out-going // arg area required for tail call. - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); assert(varDsc != nullptr); #endif // FEATURE_FASTTAILCALL } else { - varNum = compiler->lvaOutgoingArgSpaceVar; + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; } - bool isStruct = (targetType == TYP_STRUCT) || (data->OperGet() == GT_LIST); + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_LIST); if (!isStruct) // a normal non-Struct argument { instruction storeIns = ins_Store(targetType); emitAttr storeAttr = emitTypeSize(targetType); - // If it is contained then data must be the integer constant zero - if (data->isContained()) + // If it is contained then source must be the integer constant zero + if (source->isContained()) { - assert(data->OperGet() == GT_CNS_INT); - assert(data->AsIntConCommon()->IconValue() == 0); - emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNum, argOffset); + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut); } else { - genConsumeReg(data); - emit->emitIns_S_R(storeIns, storeAttr, data->gtRegNum, varNum, argOffset); + genConsumeReg(source); + emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut); } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area } - else // We have a TYP_STRUCT argument (it currently must be a 16-byte multi-reg struct) + else // We have some kind of a struct argument { - // We will use two store instructions that each write a register sized value + assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64 - // We must have a multi-reg struct that takes two slots - assert(curArgTabEntry->numSlots == 2); - assert(data->isContained()); // We expect that this node was marked as contained in LowerArm64 + if (source->OperGet() == GT_LIST) + { + // Deal with the multi register passed struct args. + GenTreeArgList* argListPtr = source->AsArgList(); - regNumber loReg = REG_NA; - regNumber hiReg = REG_NA; + // Evaluate each of the GT_LIST items into their register + // and store their register into the outgoing argument area + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest()) + { + GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1; + genConsumeReg(nextArgNode); - if (data->OperGet() != GT_LIST) - { - // In lowerArm64 we reserved two internal integer registers for this 16-byte TYP_STRUCT - genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); + regNumber reg = nextArgNode->gtRegNum; + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + // Emit store instructions to store the registers produced by the GT_LIST into the outgoing argument area + emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(attr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); - // We will need to record the GC type used by each of the load instructions - // so that we use the same type in each of the store instructions - var_types type0 = TYP_UNKNOWN; - var_types type1 = TYP_UNKNOWN; + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); - if (data->OperGet() == GT_OBJ) - { - GenTree* objNode = data; - GenTree* addrNode = objNode->gtOp.gtOp1; + // We will copy this struct to the stack, possibly using a ldp instruction + // Setup loReg and hiReg from the internal registers that we reserved in lower. + // + regNumber loReg = REG_NA; + regNumber hiReg = REG_NA; + regNumber addrReg = REG_NA; + + // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers + genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); + + GenTreeLclVarCommon* varNode = nullptr; + GenTreePtr addrNode = nullptr; - if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + if (source->OperGet() == GT_LCL_VAR) { - // We have a GT_OBJ(GT_LCL_VAR_ADDR) - // - // We will treat this case the same as a GT_LCL_VAR node - // so update 'data' to point this GT_LCL_VAR_ADDR node - // and continue to the codegen for the LCL_VAR node below + varNode = source->AsLclVarCommon(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + addrNode = source->gtOp.gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression // - data = addrNode; + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0 + BYTE* structGcLayout = &gcPtrs[0]; // The GC layout for the struct + unsigned gcPtrCount; // The count of GC pointers in the struct + int structSize; + bool isHfa; + + // Setup the structSize, isHFa, and gcPtrCount + if (varNode != nullptr) + { + varNumInp = varNode->gtLclNum; + assert(varNumInp < compiler->lvaCount); + LclVarDsc* varDsc = &compiler->lvaTable[varNumInp]; + + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame); // This struct also must live in the stack frame + assert(!varDsc->lvRegister); // And it can't live in a register (SIMD) + + structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine + // as that is how much stack is allocated for this LclVar + isHfa = varDsc->lvIsHfa(); + gcPtrCount = varDsc->lvStructGcCount; + structGcLayout = varDsc->lvGcLayout; } - else // We have a GT_OBJ with an address expression + else // addrNode is used { + assert(addrNode != nullptr); + // Generate code to load the address that we need into a register genConsumeAddress(addrNode); + addrReg = addrNode->gtRegNum; - regNumber addrReg = addrNode->gtRegNum; - var_types targetType = objNode->TypeGet(); - - noway_assert(varTypeIsStruct(targetType)); + CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass; - CORINFO_CLASS_HANDLE objClass = objNode->gtObj.gtClass; - int structSize = compiler->info.compCompHnd->getClassSize(objClass); - - assert(structSize <= 2*TARGET_POINTER_SIZE); + structSize = compiler->info.compCompHnd->getClassSize(objClass); + isHfa = compiler->IsHfa(objClass); + gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + } - // We obtain the gcPtrs values by examining op1 using getClassGClayout() + bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct + + // If we have an HFA we can't have any GC pointers, + // if not then the max size for the the struct is 16 bytes + if (isHfa) + { + noway_assert(gcPtrCount == 0); + } + else + { + noway_assert(structSize <= 2 * TARGET_POINTER_SIZE); + } - BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; - compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); - // We need to record the GC type to used for each of the loads - type0 = compiler->getJitGCType(gcPtrs[0]); - type1 = compiler->getJitGCType(gcPtrs[1]); + // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions + // ldr x2, [x0] + // ldr x3, [x0, #8] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions + // ldp x2, x3, [x0] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions + // ldp x2, x3, [x0] + // str x2, [sp, #16] + // str x3, [sp, #24] + // ldp x2, x3, [x0] + // str x2, [sp, #32] + // str x3, [sp, #40] + // + // Note that when loading from a varNode we currently can't use the ldp instruction + // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) + // - bool hasGCpointers = varTypeIsGC(type0) || varTypeIsGC(type1); + int remainingSize = structSize; + unsigned structOffset = 0; + unsigned nextIndex = 0; - noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); + while (remainingSize >= 2 * TARGET_POINTER_SIZE) + { + var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]); + var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]); - // For a 16-byte structSize with GC pointers we will use two ldr instruction to load two registers - // ldr x2, [x0] - // ldr x3, [x0] - // - // For a 16-byte structSize with no GC pointers we will use a ldp instruction to load two registers - // ldp x2, x3, [x0] - // - // For a 12-byte structSize we will we will generate two load instructions - // ldr x2, [x0] - // ldr w3, [x0, #8] - // - // When the first instruction has a loReg that is the same register - // as the source register: addrReg, we set deferLoad to true and - // issue the intructions in the reverse order: - // ldr w3, [x2, #8] - // ldr x2, [x2] - - bool deferLoad = false; - emitAttr deferAttr = EA_PTRSIZE; - int deferOffset = 0; - int remainingSize = structSize; - unsigned structOffset = 0; - var_types nextType = type0; - - // Use the ldp instruction for a struct that is exactly 16-bytes in size - // ldp x2, x3, [x0] - // - if (remainingSize == 2*TARGET_POINTER_SIZE) + if (hasGCpointers) { - if (hasGCpointers) - { - // We have GC pointers, so use two ldr instructions - // - // We do it this way because we can't currently pass or track - // two different emitAttr values for a ldp instruction. + // We have GC pointers, so use two ldr instructions + // + // We must do it this way because we can't currently pass or track + // two different emitAttr values for a ldp instruction. - // Make sure that the first load instruction does not overwrite the addrReg. - // - if (loReg != addrReg) + // Make sure that the first load instruction does not overwrite the addrReg. + // + if (loReg != addrReg) + { + if (varNode != nullptr) { - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset); - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); + emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE); } - else + else { - assert(hiReg != addrReg); - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset); + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); + emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); } } + else // loReg == addrReg + { + assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null + assert(hiReg != addrReg); + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); + } + } + else // our struct has no GC pointers + { + if (varNode != nullptr) + { + // Load from our varNumImp source, currently we can't use a ldp instruction to do this + emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); + emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE); + } else { // Use a ldp instruction + // Load from our address expression source emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset); } - remainingSize = 0; // We completely wrote the 16-byte struct } - regNumber curReg = loReg; - while (remainingSize > 0) + // Emit two store instructions to store the two registers into the outgoing argument area + emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut); + emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut, argOffsetOut + TARGET_POINTER_SIZE); + argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + + remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct + structOffset += (2 * TARGET_POINTER_SIZE); + nextIndex += 2; + } + + // For a 12-byte structSize we will we will generate two load instructions + // ldr x2, [x0] + // ldr w3, [x0, #8] + // str x2, [sp, #16] + // str w3, [sp, #24] + // + // When the first instruction has a loReg that is the same register as the addrReg, + // we set deferLoad to true and issue the intructions in the reverse order + // ldr x3, [x2, #8] + // ldr x2, [x2] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + + var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); + emitAttr nextAttr = emitTypeSize(nextType); + regNumber curReg = loReg; + + bool deferLoad = false; + var_types deferType = TYP_UNKNOWN; + emitAttr deferAttr = EA_PTRSIZE; + int deferOffset = 0; + + while (remainingSize > 0) + { + if (remainingSize >= TARGET_POINTER_SIZE) { - if (remainingSize >= TARGET_POINTER_SIZE) - { - remainingSize -= TARGET_POINTER_SIZE; + remainingSize -= TARGET_POINTER_SIZE; - if ((curReg == addrReg) && (remainingSize != 0)) - { - deferLoad = true; - deferAttr = emitTypeSize(nextType); - deferOffset = structOffset; - } - else // the typical case - { - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(nextType), curReg, addrReg, structOffset); - } - curReg = hiReg; - structOffset += TARGET_POINTER_SIZE; - nextType = type1; + if ((curReg == addrReg) && (remainingSize != 0)) + { + deferLoad = true; + deferType = nextType; + deferAttr = emitTypeSize(nextType); + deferOffset = structOffset; } - else // (remainingSize < TARGET_POINTER_SIZE) + else // the typical case { - int loadSize = remainingSize; - remainingSize = 0; - - // the left over size is smaller than a pointer and thus can never be a GC type - assert(varTypeIsGC(nextType) == false); - - var_types loadType = TYP_UINT; - if (loadSize == 1) + if (varNode != nullptr) { - loadType = TYP_UBYTE; - } - else if (loadSize == 2) - { - loadType = TYP_USHORT; + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset); } else { - // Need to handle additional loadSize cases here - noway_assert(loadSize == 4); + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset); } + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + curReg = hiReg; + structOffset += TARGET_POINTER_SIZE; + nextIndex++; + nextType = compiler->getJitGCType(gcPtrs[nextIndex]); + nextAttr = emitTypeSize(nextType); + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + int loadSize = remainingSize; + remainingSize = 0; - instruction loadIns = ins_Load(loadType); - emitAttr loadAttr = emitAttr(loadSize); + // We should never have to do a non-pointer sized load when we have a LclVar source + assert(varNode == nullptr); - // When deferLoad is false, curReg can be the same as addrReg - // because the last instruction is allowed to overwrite addrReg. - // - noway_assert(!deferLoad || (curReg != addrReg)); + // the left over size is smaller than a pointer and thus can never be a GC type + assert(varTypeIsGC(nextType) == false); - emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); + var_types loadType = TYP_UINT; + if (loadSize == 1) + { + loadType = TYP_UBYTE; + } + else if (loadSize == 2) + { + loadType = TYP_USHORT; + } + else + { + // Need to handle additional loadSize cases here + noway_assert(loadSize == 4); } - } - if (deferLoad) - { - curReg = addrReg; - emit->emitIns_R_R_I(INS_ldr, deferAttr, curReg, addrReg, deferOffset); - } - } - } - else if (data->OperGet() == GT_LIST) - { - // Deal with multi register passed struct args. - GenTreeArgList* argListPtr = data->AsArgList(); - unsigned iterationNum = 0; - for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) - { - GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1; - genConsumeReg(nextArgNode); + instruction loadIns = ins_Load(loadType); + emitAttr loadAttr = emitAttr(loadSize); - if (iterationNum == 0) - { - // record loReg and type0 for the store to the out arg space - loReg = nextArgNode->gtRegNum; - type0 = nextArgNode->TypeGet(); - } - else - { - assert(iterationNum == 1); - // record hiReg and type1 for the store to the out arg space - hiReg = nextArgNode->gtRegNum;; - type1 = nextArgNode->TypeGet(); + // When deferLoad is false, curReg can be the same as addrReg + // because the last instruction is allowed to overwrite addrReg. + // + noway_assert(!deferLoad || (curReg != addrReg)); + + emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); + + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(loadAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area } } - } - if ((data->OperGet() == GT_LCL_VAR) || (data->OperGet() == GT_LCL_VAR_ADDR)) - { - GenTreeLclVarCommon* varNode = data->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &compiler->lvaTable[varNum]; + if (deferLoad) + { + // We should never have to do a deferred load when we have a LclVar source + assert(varNode == nullptr); - // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument - assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); - // This struct also must live in the stack frame - assert(varDsc->lvOnFrame); + curReg = addrReg; - // We need to record the GC type to used for each of the loads - // We obtain the GC type values by examining the local's varDsc->lvGcLayout - // - type0 = compiler->getJitGCType(varDsc->lvGcLayout[0]); - type1 = compiler->getJitGCType(varDsc->lvGcLayout[1]); + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset); - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNum, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNum, TARGET_POINTER_SIZE); + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } } - - // We are required to set these two values above - assert(loReg != REG_NA); - assert(hiReg != REG_NA); - - // We are required to set these two values above, so that the stores have the same GC type as the loads - assert(type0 != TYP_UNKNOWN); - assert(type1 != TYP_UNKNOWN); - - // Emit two store instructions to store two consecutive registers into the outgoing argument area - emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNum, argOffset); - emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNum, argOffset + TARGET_POINTER_SIZE); } } diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 42a404a..2919d71 100755 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -3801,7 +3801,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, // argument register number 'x'. Only used when circular = true. char slot; // 0 means the register is not used for a register argument // 1 means the first part of a register argument - // 2 means the second part of a register argument (e.g., for a TYP_DOUBLE on ARM) + // 2, 3 or 4 means the second,third or fourth part of a multireg argument bool stackArg; // true if the argument gets homed to the stack bool processed; // true after we've processed the argument (and it is in its final location) bool circular; // true if this register participates in a circular dependency loop. @@ -3811,20 +3811,27 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, // So, for that case we retain the type of the register in the regArgTab. // In other cases, we simply use the type of the lclVar to determine the type of the register. +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + // This is the UNIX_AMD64 implementation var_types getRegType(Compiler* compiler) { -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - return type; -#elif defined(_TARGET_ARM_) - LclVarDsc varDsc = compiler->lvaTable[varNum]; - return varDsc.lvIsHfaRegArg ? varDsc.GetHfaType() : varDsc.lvType; + return type; // UNIX_AMD64 implementation + } - // TODO-ARM64: Do we need the above to handle HFA structs on ARM64? +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING -#else // !_TARGET_ARM_ - return compiler->lvaTable[varNum].lvType; -#endif // !_TARGET_ARM_ + // This is the implementation for all other targets + var_types getRegType(Compiler* compiler) + { + LclVarDsc varDsc = compiler->lvaTable[varNum]; + // Check if this is an HFA register arg and return the HFA type + if (varDsc.lvIsHfaRegArg()) + return varDsc.GetHfaType(); + return varDsc.lvType; } + +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } regArgTab [max(MAX_REG_ARG,MAX_FLOAT_REG_ARG)] = { }; unsigned varNum; @@ -3880,12 +3887,12 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, } } -#ifdef _TARGET_ARM_ - var_types regType = varDsc->lvIsHfaRegArg ? varDsc->GetHfaType() - : varDsc->TypeGet(); -#else // !_TARGET_ARM_ var_types regType = varDsc->TypeGet(); -#endif // !_TARGET_ARM_ + // Change regType to the HFA type when we have a HFA argument + if (varDsc->lvIsHfaRegArg()) + { + regType = varDsc->GetHfaType(); + } #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (!varTypeIsStruct(regType)) @@ -4005,18 +4012,32 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, #if FEATURE_MULTIREG_ARGS if (varDsc->lvIsMultiregStruct()) { + if (varDsc->lvIsHfaRegArg()) + { + // We have an HFA argument, set slots to the number of registers used + slots = varDsc->lvHfaSlots(); + } + else + { + // We have a non-HFA multireg argument, set slots to two + slots = 2; + } + // Note that regArgNum+1 represents an argument index not an actual argument register. // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) - // This is the setup for the second half of a MULTIREG struct arg - noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); - // we better not have added it already (there better not be multiple vars representing this argument register) - noway_assert(regArgTab[regArgNum+1].slot == 0); - - regArgTab[regArgNum+1].varNum = varNum; - regArgTab[regArgNum+1].slot = 2; - slots = 2; + // This is the setup for the rest of a multireg struct arg + noway_assert((regArgNum + (slots - 1)) < regState->rsCalleeRegArgNum); + + for (int i = 1; ilvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2)); var_types storeType = TYP_UNDEF; + unsigned slotSize = TARGET_POINTER_SIZE; -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) if (varTypeIsStruct(varDsc)) { - size = EA_SIZE(varDsc->lvSize()); -#if defined(_TARGET_AMD64_) + storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer +#if FEATURE_MULTIREG_ARGS + // Must be <= 32 bytes or else it wouldn't be passed in registers + noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); +#endif // FEATURE_MULTIREG_ARGS #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING storeType = regArgTab[argNum].type; - size = emitActualTypeSize(storeType); -#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING - storeType = (var_types)((size <= 4) ? TYP_INT : TYP_I_IMPL); - // Must be 1, 2, 4, or 8, or else it wouldn't be passed in a register - noway_assert(EA_SIZE_IN_BYTES(size) <= 8); - assert((EA_SIZE_IN_BYTES(size) & (EA_SIZE_IN_BYTES(size) - 1)) == 0); #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING -#elif defined(_TARGET_ARM64_) - // Must be <= 16 bytes or else it wouldn't be passed in registers - noway_assert(EA_SIZE_IN_BYTES(size) <= MAX_PASS_MULTIREG_BYTES); - - storeType = TYP_I_IMPL; - size = emitActualTypeSize(storeType); -#endif // _TARGET_ARM64_ - } - else -#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - { -#ifdef _TARGET_ARM_ - if (varDsc->lvIsHfaRegArg) - { - storeType = genActualType(TYP_FLOAT); - } - else -#endif // _TARGET_ARM_ + if (varDsc->lvIsHfaRegArg()) { - storeType = genActualType(varDsc->TypeGet()); + storeType = genActualType(varDsc->GetHfaType()); + slotSize = (unsigned) emitActualTypeSize(storeType); } - + } + else // Not a struct type + { + storeType = genActualType(varDsc->TypeGet()); + } + size = emitActualTypeSize(storeType); #ifdef _TARGET_X86_ - noway_assert(genTypeSize(storeType) == sizeof(void *)); + noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE); #endif //_TARGET_X86_ - size = emitActualTypeSize(storeType); - } - regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType); // Stack argument - if the ref count is 0 don't care about it @@ -4389,13 +4393,22 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, else { // Since slot is typically 1, baseOffset is typically 0 - int baseOffset = (regArgTab[argNum].slot - 1) * TARGET_POINTER_SIZE; + int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, - baseOffset); + baseOffset); + +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Check if we are writing past the end of the struct + if (varTypeIsStruct(varDsc)) + { + assert(varDsc->lvSize() >= baseOffset+(unsigned)size); + } +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + if (regArgTab[argNum].slot == 1) psiMoveToStack(varNum); @@ -4426,32 +4439,32 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, if (doingFloat) { -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - insCopy = ins_Copy(TYP_FLOAT); +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + insCopy = ins_Copy(TYP_DOUBLE); // Compute xtraReg here when we have a float argument assert(xtraReg == REG_NA); regMaskTP fpAvailMask; fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive; -#if defined(_TARGET_ARM_) - fpAvailMask &= RBM_DBL_REGS; +#if defined(FEATURE_HFA) + fpAvailMask &= RBM_ALLDOUBLE; #else #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #error Error. Wrong architecture. #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#endif // defined(_TARGET_ARM_) +#endif // defined(FEATURE_HFA) if (fpAvailMask == RBM_NONE) { fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive; -#if defined(_TARGET_ARM_) - fpAvailMask &= RBM_DBL_REGS; +#if defined(FEATURE_HFA) + fpAvailMask &= RBM_ALLDOUBLE; #else #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #error Error. Wrong architecture. #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#endif // defined(_TARGET_ARM_) +#endif // defined(FEATURE_HFA) } assert(fpAvailMask != RBM_NONE); @@ -10417,63 +10430,85 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING -#ifdef _TARGET_ARM_ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) { +#ifdef FEATURE_HFA return varTypeIsFloating(GetHfaType(hClass)); +#else + return false; +#endif } bool Compiler::IsHfa(GenTreePtr tree) { +#ifdef FEATURE_HFA return IsHfa(gtGetStructHandleIfPresent(tree)); +#else + return false; +#endif } var_types Compiler::GetHfaType(GenTreePtr tree) { - return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(gtGetStructHandleIfPresent(tree)) : TYP_UNDEF; +#ifdef FEATURE_HFA + if (tree->TypeGet() == TYP_STRUCT) + { + return GetHfaType(gtGetStructHandleIfPresent(tree)); + } +#endif + return TYP_UNDEF; } -unsigned Compiler::GetHfaSlots(GenTreePtr tree) +unsigned Compiler::GetHfaCount(GenTreePtr tree) { - return GetHfaSlots(gtGetStructHandleIfPresent(tree)); + return GetHfaCount(gtGetStructHandleIfPresent(tree)); } var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass) { - if (hClass == NO_CLASS_HANDLE) - { - return TYP_UNDEF; - } -#if 0 - // This is a workaround to allow for testing without full HFA support in the VM - if (_strnicmp(eeGetClassName(hClass), "HFA", 3) == 0) - { - return TYP_FLOAT; - } - else if (_strnicmp(eeGetClassName(hClass), "HDA", 3) == 0) + var_types result = TYP_UNDEF; + if (hClass != NO_CLASS_HANDLE) { - return TYP_DOUBLE; - } -#endif - - CorInfoType corType = info.compCompHnd->getHFAType(hClass); - if (corType == CORINFO_TYPE_UNDEF) - { - return TYP_UNDEF; +#ifdef FEATURE_HFA + CorInfoType corType = info.compCompHnd->getHFAType(hClass); + if (corType != CORINFO_TYPE_UNDEF) + { + result = JITtype2varType(corType); + } +#endif // FEATURE_HFA } - return JITtype2varType(corType); + return result; } -unsigned Compiler::GetHfaSlots(CORINFO_CLASS_HANDLE hClass) +//------------------------------------------------------------------------ +// GetHfaCount: Given a class handle for an HFA struct +// return the number of registers needed to hold the HFA +// +// Note that on ARM32 the single precision registers overlap with +// the double precision registers and for that reason each +// double register is considered to be two single registers. +// Thus for ARM32 an HFA of 4 doubles this function will return 8. +// On ARM64 given an HFA of 4 singles or 4 doubles this function will +// will return 4 for both. +// Arguments: +// hClass: the class handle of a HFA struct +// +unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass) { assert(IsHfa(hClass)); - return info.compCompHnd->getClassSize(hClass) / TARGET_POINTER_SIZE; +#ifdef _TARGET_ARM_ + // A HFA of doubles is twice as large as an HFA of singles for ARM32 + // (i.e. uses twice the number of single precison registers) + return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES; +#else // _TARGET_ARM64_ + var_types hfaType = GetHfaType(hClass); + unsigned classSize = info.compCompHnd->getClassSize(hClass); + // Note that the retail build issues a warning about a potential divsion by zero without the Max function + unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType))); + return classSize / elemSize; +#endif // _TARGET_ARM64_ } - -#endif // _TARGET_ARM_ - - #ifdef _TARGET_XARCH_ //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/codegenlegacy.cpp b/src/coreclr/src/jit/codegenlegacy.cpp index 248ed5b..7cb0315 100644 --- a/src/coreclr/src/jit/codegenlegacy.cpp +++ b/src/coreclr/src/jit/codegenlegacy.cpp @@ -5461,7 +5461,7 @@ void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree) else #endif // _TARGET_64BIT_ #ifdef _TARGET_ARM_ - if (varDsc->lvIsHfaRegArg) + if (varDsc->lvIsHfaRegArg()) { const var_types elemType = varDsc->GetHfaType(); const instruction loadOp = ins_Load(elemType); @@ -20652,7 +20652,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, { assert(call->gtCall.gtRetClsHnd != NULL); assert(compiler->IsHfa(call->gtCall.gtRetClsHnd)); - int retSlots = compiler->GetHfaSlots(call->gtCall.gtRetClsHnd); + int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd); assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS); assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8); retVal = ((1 << retSlots) - 1) << REG_FLOATRET; diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index ddd6d4a..e7fcb71 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -546,13 +546,9 @@ var_types Compiler::argOrReturnTypeForStruct(unsigned size, CORINFO_CLASS_HAN if (size <= MAX_RET_MULTIREG_BYTES) { #ifdef _TARGET_ARM64_ - assert(size > TARGET_POINTER_SIZE); - - // For structs that are 9 to 16 bytes in size set useType to TYP_STRUCT, - // as this means a 9-16 byte struct value in two registers - // - useType = TYP_STRUCT; -#endif // _TARGET_ARM64_ + // TODO-ARM64-HFA - Implement x0,x1 returns + // TODO-ARM64 - Implement HFA returns +#endif // _TARGET_XXX_ } } #endif // FEATURE_MULTIREG_RET @@ -565,10 +561,13 @@ var_types Compiler::argOrReturnTypeForStruct(unsigned size, CORINFO_CLASS_HAN #ifdef _TARGET_ARM64_ assert(size > TARGET_POINTER_SIZE); - // For structs that are 9 to 16 bytes in size set useType to TYP_STRUCT, - // as this means a 9-16 byte struct value in two registers - // - useType = TYP_STRUCT; + // On ARM64 structs that are 9-16 bytes are passed by value + // or if the struct is an HFA it is passed by value + if ((size <= (TARGET_POINTER_SIZE * 2)) || IsHfa(clsHnd)) + { + // set useType to TYP_STRUCT to indicate that this is passed by value in registers + useType = TYP_STRUCT; + } #endif // _TARGET_ARM64_ } } diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index e44b0ec..4eaf0d4 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -279,19 +279,19 @@ public: unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout" - unsigned char lvIsMultiRegArgOrRet:1; // Is this a struct that would be passed or returned in multiple registers? + unsigned char lvIsMultiRegArgOrRet:1; // Is this a struct that would be passed or returned in multiple registers? -#ifdef _TARGET_ARM_ - // TODO-Cleanup: Can this be subsumed by the above? - unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument. - unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double? -#endif // _TARGET_ARM_ +#ifdef FEATURE_HFA + unsigned char _lvIsHfa :1; // Is this a struct variable who's class handle is an HFA type + unsigned char _lvIsHfaRegArg :1; // Is this a HFA argument variable? // TODO-CLEANUP: Remove this and replace with (lvIsRegArg && lvIsHfa()) + unsigned char _lvHfaTypeIsFloat :1; // Is the HFA type float or double? +#endif // FEATURE_HFA #ifdef DEBUG // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct // types, and is needed because of cases where TYP_STRUCT is bashed to an integral type. // Consider cleaning this up so this workaround is not required. - unsigned char lvUnusedStruct :1; // All references to this promoted struct are through its field locals. + unsigned char lvUnusedStruct :1; // All references to this promoted struct are through its field locals. // I.e. there is no longer any reference to the struct directly. // In this case we can simply remove this struct local. #endif @@ -340,6 +340,69 @@ public: } #endif // FEATURE_MULTIREG_ARGS + bool lvIsHfa() const + { +#ifdef FEATURE_HFA + return _lvIsHfa; +#else + return false; +#endif + } + + void lvSetIsHfa() + { +#ifdef FEATURE_HFA + _lvIsHfa = true; +#endif + } + + bool lvIsHfaRegArg() const + { +#ifdef FEATURE_HFA + return _lvIsHfaRegArg; +#else + return false; +#endif + } + + void lvSetIsHfaRegArg() + { +#ifdef FEATURE_HFA + _lvIsHfaRegArg = true; +#endif + } + + bool lvHfaTypeIsFloat() const + { +#ifdef FEATURE_HFA + return _lvHfaTypeIsFloat; +#else + return false; +#endif + } + + void lvSetHfaTypeIsFloat(bool value) + { +#ifdef FEATURE_HFA + _lvHfaTypeIsFloat = value; +#endif + } + + // Returns 1-4 indicating the number of register slots used by the HFA + unsigned lvHfaSlots() const + { + assert(lvIsHfa()); + assert(lvType==TYP_STRUCT); + if (lvHfaTypeIsFloat()) + { + return lvExactSize / sizeof(float); + } + else + { + return lvExactSize / sizeof(double); + } + } + private: regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair). @@ -598,19 +661,26 @@ public: assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct)); - return (unsigned)(roundUp(lvExactSize, sizeof(void*))); + return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE)); } bool lvIsMultiregStruct() { #if FEATURE_MULTIREG_ARGS_OR_RET -#ifdef _TARGET_ARM64_ - if ((TypeGet() == TYP_STRUCT) && - (lvSize() == 2 * TARGET_POINTER_SIZE)) + if (TypeGet() == TYP_STRUCT) { - return true; + if (lvIsHfa() && (lvHfaSlots() > 1)) + { + return true; + } +#if defined(_TARGET_ARM64_) + // lvSize() performs a roundUp operation so it only returns multiples of TARGET_POINTER_SIZE + else if (lvSize() == (2 * TARGET_POINTER_SIZE)) + { + return true; + } +#endif // _TARGET_ARM64_ } -#endif // _TARGET_ARM64_ #endif // FEATURE_MULTIREG_ARGS_OR_RET return false; } @@ -660,24 +730,17 @@ public: void addPrefReg(regMaskTP regMask, Compiler * pComp); bool IsFloatRegType() const { - return -#ifdef _TARGET_ARM_ - lvIsHfaRegArg || -#endif - isFloatRegType(lvType); + return isFloatRegType(lvType) || lvIsHfaRegArg(); } -#ifdef _TARGET_ARM_ var_types GetHfaType() const { - assert(lvIsHfaRegArg); - return lvIsHfaRegArg ? (lvHfaTypeIsFloat ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF; + return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF; } void SetHfaType(var_types type) { assert(varTypeIsFloating(type)); - lvHfaTypeIsFloat = (type == TYP_FLOAT); + lvSetHfaTypeIsFloat(type == TYP_FLOAT); } -#endif //_TARGET_ARM_ #ifndef LEGACY_BACKEND var_types lvaArgType(); @@ -1084,6 +1147,9 @@ struct fgArgTabEntry { return isBackFilled; } +#ifdef DEBUG + void Dump(); +#endif }; typedef struct fgArgTabEntry * fgArgTabEntryPtr; @@ -1172,8 +1238,10 @@ public: void RecordStkLevel (unsigned stkLvl); unsigned RetrieveStkLevel (); - unsigned ArgCount () { return argCount; } - fgArgTabEntryPtr * ArgTable () { return argTable; } + unsigned ArgCount () { return argCount; } + fgArgTabEntryPtr * ArgTable () { return argTable; } + unsigned GetNextSlotNum() { return nextSlotNum; } + }; @@ -1333,26 +1401,22 @@ public: GenTreePtr impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass); #endif // FEATURE_MULTIREG_RET -#ifdef _TARGET_ARM_ - //------------------------------------------------------------------------- // Functions to handle homogeneous floating-point aggregates (HFAs) in ARM. // HFAs are one to four element structs where each element is the same // type, either all float or all double. They are treated specially // in the ARM Procedure Call Standard, specifically, they are passed in - // floating-point registers. + // floating-point registers instead of the general purpose registers. // bool IsHfa(CORINFO_CLASS_HANDLE hClass); bool IsHfa(GenTreePtr tree); var_types GetHfaType(GenTreePtr tree); - unsigned GetHfaSlots(GenTreePtr tree); + unsigned GetHfaCount(GenTreePtr tree); var_types GetHfaType(CORINFO_CLASS_HANDLE hClass); - unsigned GetHfaSlots(CORINFO_CLASS_HANDLE hClass); - -#endif // _TARGET_ARM_ + unsigned GetHfaCount(CORINFO_CLASS_HANDLE hClass); //------------------------------------------------------------------------- // The following is used for struct passing on System V system. @@ -3030,7 +3094,7 @@ private: void impLoadLoc(unsigned ilLclNum, IL_OFFSET offset); bool impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &opcode); -#if defined(_TARGET_ARM_) +#ifdef _TARGET_ARM_ void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass); #endif @@ -4619,11 +4683,11 @@ private: void fgInsertInlineeBlocks (InlineInfo* pInlineInfo); GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo); -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree); GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd); -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder; #ifdef DEBUG @@ -7869,7 +7933,7 @@ public : // Returns true if the method being compiled returns a non-void and non-struct value. // Note that lvaInitTypeRef() normalizes compRetNativeType for struct returns in a // single register as per target arch ABI (e.g on Amd64 Windows structs of size 1, 2, - // 4 or 8 gets normalized to TYP_BYTE/TYP_SHORT/TYP_INT/TYP_LONG; On Arm Hfa structs). + // 4 or 8 gets normalized to TYP_BYTE/TYP_SHORT/TYP_INT/TYP_LONG; On Arm HFA structs). // Methods returning such structs are considered to return non-struct return value and // this method returns true in that case. bool compMethodReturnsNativeScalarType() diff --git a/src/coreclr/src/jit/ee_il_dll.cpp b/src/coreclr/src/jit/ee_il_dll.cpp index b97c10c..787c311 100755 --- a/src/coreclr/src/jit/ee_il_dll.cpp +++ b/src/coreclr/src/jit/ee_il_dll.cpp @@ -439,15 +439,39 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_ assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2*sizeof(void*)); #if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ + // For each target that supports passing struct args in multiple registers + // apply the target specific rules for them here: +#if defined(_TARGET_ARM64_) + // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference if (structSize > MAX_PASS_MULTIREG_BYTES) { // This struct is passed by reference using a single 'slot' return TARGET_POINTER_SIZE; } -#endif // _TARGET_ARM64_ + else + { + // Is the struct larger than 16 bytes + if (structSize > (2 * TARGET_POINTER_SIZE)) + { + var_types hfaType = GetHfaType(argClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF + bool isHfa = (hfaType != TYP_UNDEF); + if (!isHfa) + { + // This struct is passed by reference using a single 'slot' + return TARGET_POINTER_SIZE; + } + } + } + // otherwise will we pass this struct by value in multiple registers + // +#elif defined(_TARGET_ARM_) + // otherwise will we pass this struct by value in multiple registers +#else // + NYI("unknown target"); +#endif // defined(_TARGET_XXX_) #endif // FEATURE_MULTIREG_ARGS + // we pass this struct by value in multiple registers return (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); } else diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index aec04b4..e8c1c8b 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -21646,7 +21646,7 @@ void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree, #endif -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /********************************************************************************* * @@ -21778,7 +21778,7 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this); } -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) /***************************************************************************** * Callback to replace the inline return expression place holder (GT_RET_EXPR) @@ -21793,12 +21793,13 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( if (tree->gtOper == GT_RET_EXPR) { -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // We are going to copy the tree from the inlinee, so save the handle now. CORINFO_CLASS_HANDLE retClsHnd = varTypeIsStruct(tree) ? tree->gtRetExpr.gtRetClsHnd : NO_CLASS_HANDLE; -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + do { @@ -21836,12 +21837,14 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( } while (tree->gtOper == GT_RET_EXPR); -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#if defined(_TARGET_ARM_) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) if (retClsHnd != NO_CLASS_HANDLE && comp->IsHfa(retClsHnd)) #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (retClsHnd != NO_CLASS_HANDLE && comp->IsRegisterPassable(retClsHnd)) -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#else + assert(!"Unhandled target"); +#endif // FEATURE_HFA { GenTreePtr parent = data->parent; // See assert below, we only look one level above for an asg parent. @@ -21856,10 +21859,10 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp); } } -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#if defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) +#if defined(DEBUG) && defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Make sure we don't have a tree like so: V05 = (, , , retExpr); // Since we only look one level above for the parent for '=' and // do not check if there is a series of COMMAs. See above. @@ -21877,7 +21880,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( // empty } -#if defined(_TARGET_ARM_) +#if defined(FEATURE_HFA) noway_assert(!varTypeIsStruct(comma) || comma->gtOper != GT_RET_EXPR || (!comp->IsHfa(comma->gtRetExpr.gtRetClsHnd))); @@ -21887,7 +21890,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( (!comp->IsRegisterPassable(comma->gtRetExpr.gtRetClsHnd))); #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } -#endif // defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) +#endif // defined(DEBUG) && defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) return WALK_CONTINUE; } diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 4a5b4ab..7a5e551 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -238,13 +238,13 @@ void GenTree::InitNodeSize() // Now set all of the appropriate entries to 'large' - // On ARM and System V struct returning there - // is code that does GT_ASG-tree.CopyObj call. + // On ARM32, ARM64 and System V for struct returning + // there is code that does GT_ASG-tree.CopyObj call. // CopyObj is a large node and the GT_ASG is small, which triggers an exception. -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE; -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTree::s_gtNodeSizes[GT_CALL ] = TREE_NODE_SZ_LARGE; GenTree::s_gtNodeSizes[GT_CAST ] = TREE_NODE_SZ_LARGE; @@ -276,12 +276,12 @@ void GenTree::InitNodeSize() #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING GenTree::s_gtNodeSizes[GT_PUTARG_STK ] = TREE_NODE_SZ_LARGE; #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // In importer for Hfa and register returned structs we rewrite GT_ASG to GT_COPYOBJ/GT_CPYBLK // Make sure the sizes agree. assert(GenTree::s_gtNodeSizes[GT_COPYOBJ] <= GenTree::s_gtNodeSizes[GT_ASG]); assert(GenTree::s_gtNodeSizes[GT_COPYBLK] <= GenTree::s_gtNodeSizes[GT_ASG]); -#endif // !(defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) +#endif // !(defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -5630,7 +5630,7 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB // ARM has HFA struct return values, HFA return values are received in registers from GT_CALL, // using struct assignment. -#ifdef _TARGET_ARM_ +#ifdef FEATURE_HFA assert(isPhiDefn || type != TYP_STRUCT || IsHfa(dst) || IsHfa(src)); #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // You need to use GT_COPYBLK for assigning structs @@ -7386,6 +7386,10 @@ Compiler::gtDispNodeName(GenTree *tree) { sprintf_s(bufp, sizeof(buf), " %s(h)%c", name, 0); } + else if (tree->gtOper == GT_PUTARG_STK) + { + sprintf_s(bufp, sizeof(buf), " %s [+0x%02x]%c", name, tree->AsPutArgStk()->getArgOffset(), 0); + } else if (tree->gtOper == GT_CALL) { const char * callType = "call"; @@ -9021,14 +9025,10 @@ void Compiler::gtGetArgMsg(GenTreePtr call, { sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, curArgTabEntry->slotNum * TARGET_POINTER_SIZE, 0); } - else if (listCount == 1) - { - sprintf_s(bufp, bufLength, "arg%d hi +%02x%c", argNum, (curArgTabEntry->slotNum + 1) * TARGET_POINTER_SIZE, 0); - } - else + else // listCount is 0,1,2 or 3 { - assert(listCount == 0); - sprintf_s(bufp, bufLength, "arg%d lo +%02x%c", argNum, (curArgTabEntry->slotNum + 0) * TARGET_POINTER_SIZE, 0); + assert(listCount <= MAX_ARG_REG_COUNT); + sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, (curArgTabEntry->slotNum + listCount) * TARGET_POINTER_SIZE, 0); } #else sprintf_s(bufp, bufLength, "arg%d on STK%c", argNum, 0); @@ -9090,22 +9090,29 @@ void Compiler::gtGetLateArgMsg(GenTreePtr call, } else { -#ifdef _TARGET_ARM64_ - if (curArgTabEntry->numRegs == 2) +#if FEATURE_MULTIREG_ARGS + if (curArgTabEntry->numRegs >= 2) { - regNumber argReg2 = REG_NEXT(argReg); + regNumber otherRegNum; +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + assert(curArgTabEntry->numRegs == 2); + otherRegNum = curArgTabEntry->otherRegNum; +#else + otherRegNum = (regNumber)(((unsigned)curArgTabEntry->regNum) + curArgTabEntry->numRegs - 1); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + if (listCount == -1) { - sprintf_s(bufp, bufLength, "arg%d %s,%s%c", curArgTabEntry->argNum, compRegVarName(argReg), compRegVarName(argReg2), 0); - } - else if (listCount == 1) - { - sprintf_s(bufp, bufLength, "arg%d hi %s%c", curArgTabEntry->argNum, compRegVarName(argReg2), 0); + char seperator = (curArgTabEntry->numRegs == 2) ? ',' : '-'; + + sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, + compRegVarName(argReg), seperator, compRegVarName(otherRegNum), 0); } - else + else // listCount is 0,1,2 or 3 { - assert(listCount == 0); - sprintf_s(bufp, bufLength, "arg%d lo %s%c", curArgTabEntry->argNum, compRegVarName(argReg), 0); + assert(listCount <= MAX_ARG_REG_COUNT); + regNumber curReg = (listCount == 1) ? otherRegNum : (regNumber)((unsigned)(argReg)+listCount); + sprintf_s(bufp, bufLength, "arg%d m%d %s%c", curArgTabEntry->argNum, listCount, compRegVarName(curReg), 0); } } else @@ -13786,7 +13793,9 @@ void ReturnTypeDesc::Initialize(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd) // and yet to be implemented for other multi-reg return // targets (Arm64/Arm32/x86). // -// TODO-ARM: Implement this routine to support HFA returns. +// TODO-ARM: Implement this routine to support HFA returns. +// TODO-ARM64: Implement this routine to support HFA returns. +// TODO-X86: Implement this routine to support long returns. regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) { unsigned count = GetReturnRegCount(); @@ -13869,7 +13878,9 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) // This routine can be used when the caller is not particular about the order // of return registers and wants to know the set of return registers. // -// TODO-ARM: Implement this routine to support HFA returns. +// TODO-ARM: Implement this routine to support HFA returns. +// TODO-ARM64: Implement this routine to support HFA returns. +// TODO-X86: Implement this routine to support long returns. // //static regMaskTP ReturnTypeDesc::GetABIReturnRegs() diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index ff2ffdc..d87da97 100755 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -6928,13 +6928,13 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO * methInfo) // Support for any additional cases that don't use a Return Buffer Argument // on targets that support multi-reg return valuetypes. // - #ifdef _TARGET_ARM_ + #ifdef FEATURE_HFA // On ARM HFAs are returned in registers. if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass)) { return false; } - #endif + #endif // FEATURE_HFA #endif // FEATURE_MULTIREG_RET @@ -7026,9 +7026,10 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call call->gtCall.gtRetClsHnd = retClsHnd; -#ifdef _TARGET_ARM_ +#if FEATURE_MULTIREG_RET && defined(FEATURE_HFA) // There is no fixup necessary if the return type is a HFA struct. - // HFA structs are returned in registers s0-s3 or d0-d3 in ARM. + // HFA structs are returned in registers for ARM32 and ARM64 + // if (!call->gtCall.IsVarargs() && IsHfa(retClsHnd)) { if (call->gtCall.CanTailCall()) @@ -7216,7 +7217,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL assert(info.compRetNativeType != TYP_STRUCT); #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING -#elif defined(_TARGET_ARM_) +#elif FEATURE_MULTIREG_RET && defined(FEATURE_HFA) if (!info.compIsVarArgs && IsHfa(retClsHnd)) { if (op->gtOper == GT_LCL_VAR) @@ -7244,7 +7245,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL } return impAssignMultiRegTypeToVar(op, retClsHnd); } -#endif //_TARGET_ARM_ +#endif // FEATURE_MULTIREG_RET && FEATURE_HFA REDO_RETURN_NODE: // adjust the type away from struct to integral @@ -13652,7 +13653,7 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset) } } -#if defined(_TARGET_ARM_) +#ifdef _TARGET_ARM_ /************************************************************************************** * * When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the @@ -13674,7 +13675,7 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN { if (src->gtOper == GT_CALL && src->gtCall.IsVarargs() && IsHfa(hClass)) { - int hfaSlots = GetHfaSlots(hClass); + int hfaSlots = GetHfaCount(hClass); var_types hfaType = GetHfaType(hClass); // If we have varargs we morph the method's return type to be "int" irrespective of its original @@ -13690,16 +13691,16 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN } } } -#endif +#endif // _TARGET_ARM_ #if FEATURE_MULTIREG_RET GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) { unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return.")); - impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE); + impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE); GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType); -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // If single eightbyte, the return type would have been normalized and there won't be a temp var. // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) assert(IsMultiRegReturnedType(hClass)); @@ -13712,7 +13713,7 @@ GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HAN return ret; } -#endif // FEATURE_MULTIREG_RET +#endif // FEATURE_MULTIREG_RET // do import for a return // returns false if inlining was aborted @@ -13946,12 +13947,12 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & // TODO-ARM64-NYI: HFA // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the // next ifdefs could be refactored in a single method with the ifdef inside. -#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#if defined(_TARGET_ARM_) +#if FEATURE_MULTIREG_RET +#if defined(FEATURE_HFA) if (IsHfa(retClsHnd)) { // Same as !IsHfa but just don't bother with impAssignStructPtr. -#else // !defined(_TARGET_ARM_) +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) ReturnTypeDesc retTypeDesc; retTypeDesc.Initialize(this, retClsHnd); unsigned retRegCount = retTypeDesc.GetReturnRegCount(); @@ -13962,18 +13963,18 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) assert(retRegCount == MAX_RET_REG_COUNT); // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr. -#endif // !defined(_TARGET_ARM_) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM) { if (!impInlineInfo->retExpr) { -#if defined(_TARGET_ARM_) +#if defined(FEATURE_HFA) impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType); -#else // !defined(_TARGET_ARM_) +#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // The inlinee compiler has figured out the type of the temp already. Use it here. impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType); -#endif // !defined(_TARGET_ARM_) +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) } } else @@ -13998,7 +13999,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } } else -#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // FEATURE_MULTIREG_RET { assert(iciCall->AsCall()->HasRetBufArg()); GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1); @@ -14065,10 +14066,9 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & } else if (varTypeIsStruct(info.compRetType)) { -#if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - // In ARM HFA native types are maintained as structs. - // The multi register System V AMD64 return structs are also left as structs and not normalized. - // TODO-ARM64-NYI: HFA +#if !FEATURE_MULTIREG_RET + // For both ARM architectures the HFA native types are maintained as structs. + // Also on System V AMD64 the multireg structs returns are also left as structs. noway_assert(info.compRetNativeType != TYP_STRUCT); #endif op2 = impFixupStructReturnType(op2, retClsHnd); diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp index 8782863..768e747 100644 --- a/src/coreclr/src/jit/lclvars.cpp +++ b/src/coreclr/src/jit/lclvars.cpp @@ -133,14 +133,13 @@ void Compiler::lvaInitTypeRef() // Change the compRetNativeType if we are returning a struct by value in a register if (!hasRetBuffArg && varTypeIsStruct(info.compRetNativeType)) { -#ifdef _TARGET_ARM_ - // TODO-ARM64-NYI: HFA +#if FEATURE_MULTIREG_RET && defined(FEATURE_HFA) if (!info.compIsVarArgs && !opts.compUseSoftFP && IsHfa(info.compMethodInfo->args.retTypeClass)) { info.compRetNativeType = TYP_STRUCT; } else -#endif +#endif // FEATURE_MULTIREG_RET && defined(FEATURE_HFA) { #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING ReturnTypeDesc retTypeDesc; @@ -533,52 +532,67 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) regMaskTP doubleAlignMask = RBM_NONE; for (unsigned i = 0; - i < argSigLen; - i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst)) + i < argSigLen; + i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst)) { LclVarDsc * varDsc = varDscInfo->varDsc; CORINFO_CLASS_HANDLE typeHnd = NULL; - CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, - argLst, - &typeHnd); + CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, + argLst, + &typeHnd); varDsc->lvIsParam = 1; #if ASSERTION_PROP varDsc->lvSingleDef = 1; #endif - lvaInitVarDsc( varDsc, - varDscInfo->varNum, - strip(corInfoType), - typeHnd, - argLst, - &info.compMethodInfo->args); + lvaInitVarDsc(varDsc, + varDscInfo->varNum, + strip(corInfoType), + typeHnd, + argLst, + &info.compMethodInfo->args); // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers var_types argType = mangleVarArgsType(varDsc->TypeGet()); + var_types origArgType = argType; unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args); unsigned cSlots = argSize / TARGET_POINTER_SIZE; // the total number of slots of this argument + bool isHfaArg = false; + var_types hfaType = TYP_UNDEF; + // Methods that use VarArg or SoftFP cannot have HFA arguments + if (!info.compIsVarArgs && !opts.compUseSoftFP) + { + // If the argType is a struct, then check if it is an HFA + if (varTypeIsStruct(argType)) + { + hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF + isHfaArg = varTypeIsFloating(hfaType); + } + } + if (isHfaArg) + { + // We have an HFA argument, so from here on our treat the type as a float or double. + // The orginal struct type is available by using origArgType + // We also update the cSlots to be the number of float/double fields in the HFA + argType = hfaType; + cSlots = varDsc->lvHfaSlots(); + } // The number of slots that must be enregistered if we are to consider this argument enregistered. // This is normally the same as cSlots, since we normally either enregister the entire object, // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider // it enregistered, as long as we can split the rest onto the stack. - // TODO-ARM64-NYI: we can enregister a struct <= 16 bytes into two consecutive registers, if there are enough remaining argument registers. - // TODO-ARM64-NYI: HFA - unsigned cSlotsToEnregister = cSlots; + unsigned cSlotsToEnregister = cSlots; #ifdef _TARGET_ARM_ - - var_types hfaType = (varTypeIsStruct(argType)) ? GetHfaType(typeHnd) : TYP_UNDEF; - bool isHfaArg = !info.compIsVarArgs && !opts.compUseSoftFP && varTypeIsFloating(hfaType); - // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers. // But we pre-spill user arguments in varargs methods and structs. - // + // unsigned cAlign; bool preSpill = info.compIsVarArgs || opts.compUseSoftFP; - switch (argType) + switch (origArgType) { case TYP_STRUCT: assert(varDsc->lvSize() == argSize); @@ -603,12 +617,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) break; } - if (isHfaArg) - { - // We've got the HFA size and alignment, so from here on out treat - // the type as a float or double. - argType = hfaType; - } if (isRegParamType(argType)) { compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES; @@ -750,14 +758,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots); } -#ifdef _TARGET_ARM_ if (isHfaArg) { // We need to save the fact that this HFA is enregistered - varDsc->lvIsHfaRegArg = true; - varDsc->SetHfaType(argType); + varDsc->lvSetIsHfa(); + varDsc->lvSetIsHfaRegArg(); + varDsc->SetHfaType(hfaType); + varDsc->lvIsMultiRegArgOrRet = (varDsc->lvHfaSlots() > 1); } -#endif // _TARGET_ARM_ varDsc->lvIsRegArg = 1; @@ -912,7 +920,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo) #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING compArgSize += argSize; #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING - if (info.compIsVarArgs || opts.compUseSoftFP) + if (info.compIsVarArgs || isHfaArg || opts.compUseSoftFP) { #if defined(_TARGET_X86_) varDsc->lvStkOffs = compArgSize; @@ -1434,13 +1442,13 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, return; } -#ifdef _TARGET_ARM_ - // For ARM don't struct promote if we have an CUSTOMLAYOUT flag on an HFA type - if (StructHasCustomLayout(typeFlags) && IsHfa(typeHnd)) + // Don't struct promote if we have an CUSTOMLAYOUT flag on an HFA type + if (StructHasCustomLayout(typeFlags) && IsHfa(typeHnd)) { return; } +#ifdef _TARGET_ARM_ // On ARM, we have a requirement on the struct alignment; see below. unsigned structAlignment = roundUp(info.compCompHnd->getClassAlignmentRequirement(typeHnd), TARGET_POINTER_SIZE); #endif // _TARGET_ARM @@ -1600,17 +1608,17 @@ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo #endif -#ifdef _TARGET_ARM_ + // TODO-PERF - Allow struct promotion for HFA register arguments + // Explicitly check for HFA reg args and reject them for promotion here. // Promoting HFA args will fire an assert in lvaAssignFrameOffsets // when the HFA reg arg is struct promoted. // - if (varDsc->lvIsHfaRegArg) + if (varDsc->lvIsHfaRegArg()) { StructPromotionInfo->canPromote = false; return; } -#endif CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); lvaCanPromoteStructType(typeHnd, StructPromotionInfo, true); @@ -1963,6 +1971,25 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, boo varDsc->lvBaseType = simdBaseType; } #endif // FEATURE_SIMD +#ifdef FEATURE_HFA + // for structs that are small enough, we check and set lvIsHfa and lvHfaTypeIsFloat + if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES) + { + var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF + if (varTypeIsFloating(hfaType)) + { + varDsc->_lvIsHfa = true; + varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT); + + // hfa variables can never contain GC pointers + assert(varDsc->lvStructGcCount == 0); + // The size of this struct should be evenly divisible by 4 or 8 + assert((varDsc->lvExactSize % genTypeSize(hfaType)) == 0); + // The number of elements in the HFA should fit into our MAX_ARG_REG_COUNT limit + assert((varDsc->lvExactSize / genTypeSize(hfaType)) <= MAX_ARG_REG_COUNT); + } + } +#endif // FEATURE_HFA } else { @@ -5925,10 +5952,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t } } -#ifdef _TARGET_ARM_ - if (varDsc->lvIsHfaRegArg) + if (varDsc->lvIsHfaRegArg()) { - if (varDsc->lvHfaTypeIsFloat) + if (varDsc->lvHfaTypeIsFloat()) { printf(" (enregistered HFA: float) "); } @@ -5937,7 +5963,6 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t printf(" (enregistered HFA: double)"); } } -#endif // _TARGET_ARM_ if (varDsc->lvDoNotEnregister) { diff --git a/src/coreclr/src/jit/lowerarm64.cpp b/src/coreclr/src/jit/lowerarm64.cpp index 7372483..d0ec2a2 100644 --- a/src/coreclr/src/jit/lowerarm64.cpp +++ b/src/coreclr/src/jit/lowerarm64.cpp @@ -650,6 +650,9 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } else { +#ifdef DEBUG + compiler->gtDispTree(argNode); +#endif noway_assert(!"Unsupported TYP_STRUCT arg kind"); } @@ -1048,13 +1051,10 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.dstCount = 0; - // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a 16-byte pass-by-value struct + // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST)) { - // We will use two store instructions that each write a register sized value - - // We must have a multi-reg struct - assert(info->numSlots >= 2); + // We will use store instructions that each write a register sized value if (putArgChild->OperGet() == GT_LIST) { diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index dcf4536..09d6d5e 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3475,12 +3475,10 @@ LinearScan::updateRegStateForArg(LclVarDsc* argDsc) #endif ); -#ifdef _TARGET_ARM_ - if (argDsc->lvIsHfaRegArg) + if (argDsc->lvIsHfaRegArg()) { isFloat = true; } -#endif // _TARGET_ARM_ if (isFloat) { diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 217ac21..e8256cf 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -903,6 +903,40 @@ unsigned UpdateGT_LISTFlags(GenTreePtr tree) return tree->gtFlags; } +#ifdef DEBUG +void fgArgTabEntry::Dump() +{ + if (regNum == REG_STK) + { + printf("fgArgTabEntry[arg%d, stk%02x, slots=%d", argNum, slotNum, numSlots); + } + else + { +#ifdef _TARGET_ARM64_ + if (emitter::isFloatReg(regNum)) + { + printf("fgArgTabEntry[arg%d, d%d, regs=%d", argNum, regNum-REG_FP_FIRST, numRegs); + } + else // integer register + { + printf("fgArgTabEntry[arg%d, x%d, regs=%d", argNum, regNum-REG_INT_FIRST, numRegs); + } +#else + printf("fgArgTabEntry[arg%02d, r%d, regs=%d", argNum, regNum, numRegs); +#endif + } + if (needTmp) + { + printf(", tmpNum=V%02d", tmpNum); + } + if (isHfaRegArg) + { + printf(", isHfa"); + } + printf("]\n"); +} +#endif + fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs) { compiler = comp; @@ -1608,6 +1642,33 @@ void fgArgInfo::ArgsComplete() // Spill multireg struct arguments that are expensive to evaluate twice curArgTabEntry->needTmp = true; } + else if (argx->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argx->AsObj(); + CORINFO_CLASS_HANDLE objClass = argObj->gtClass; + unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass); + switch (structSize) + { + case 11: + case 13: + case 14: + case 15: + // Spill any GT_OBJ multireg structs that are difficult to extract + // + // When we have a GT_OBJ of a struct with the above sizes we would need + // to use 3 or 4 load instructions to load the exact size of this struct. + // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence + // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. + // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing + // the argument. + // + curArgTabEntry->needTmp = true; + break; + + default: + break; + } + } } } #endif // FEATURE_MULTIREG_ARGS @@ -2935,7 +2996,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - bool hasStructArgument = false; // @TODO-ARM64-UNIX: Eemove this bool during a future refactoring + bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring bool hasMultiregStructArgs = false; for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) { @@ -2962,16 +3023,27 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) argx->gtType = TYP_I_IMPL; bool passUsingFloatRegs; - unsigned argAlign = 1; + unsigned argAlign = 1; + // Setup any HFA information about 'argx' + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); + unsigned hfaSlots = 0; -#ifdef _TARGET_ARM_ - var_types hfaType = GetHfaType(argx); - bool isHfaArg = varTypeIsFloating(hfaType); -#endif // _TARGET_ARM_ + if (isHfaArg) + { + hfaSlots = GetHfaCount(argx); + + // If we have a HFA struct it's possible we transition from a method that originally + // only had integer types to now start having FP types. We have to communicate this + // through this flag since LSRA later on will use this flag to determine whether + // or not to track the FP register set. + // + compFloatingPointUsed = true; + } unsigned size = 0; CORINFO_CLASS_HANDLE copyBlkClass = NULL; - bool isRegArg = false; + bool isRegArg = false; fgArgTabEntryPtr argEntry = NULL; @@ -3034,14 +3106,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #elif defined(_TARGET_ARM64_) - // TODO-ARM64-NYI: HFA/HVA if (lateArgsComputed) { passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); } else { - passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx); + passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)); } #elif defined(_TARGET_AMD64_) @@ -3070,6 +3141,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) bool isBackFilled = false; unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use var_types structBaseType = TYP_STRUCT; + unsigned structSize = 0; #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) unsigned int structFloatRegs = 0; @@ -3121,13 +3193,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // This size has now been computed assert(size != 0); } - else + else // !lateArgsComputed { // - // Figure out the size of the argument. This is either in number of registers, or number of register-sized - // stack slots, or both if the argument is split between the registers and the stack. + // Figure out the size of the argument. This is either in number of registers, or number of TARGET_POINTER_SIZE + // stack slots, or the sum of these if the argument is split between the registers and the stack. // - if (argx->IsArgPlaceHolderNode() || (!isStructArg)) { #if defined(_TARGET_AMD64_) @@ -3151,22 +3222,33 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #elif defined(_TARGET_ARM64_) if (isStructArg) { - // Structs are either passed in 1 or 2 (64-bit) slots - size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - if (size > 2) + if (isHfaArg) { - size = 1; // Large structs are passed by reference (to a copy) + size = GetHfaCount(argx); + // HFA structs are passed by value in multiple registers + hasMultiregStructArgs = true; } - else if (size == 2) + else { - hasMultiregStructArgs = true; + // Structs are either passed in 1 or 2 (64-bit) slots + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + + if (size == 2) + { + // Structs that are the size of 2 pointers are passed by value in multiple registers + hasMultiregStructArgs = true; + } + else if (size > 2) + { + size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by reference (to a copy) + } } - // Note that there are some additional rules for size=2 structs, + // Note that there are some additional rules for multireg structs. // (i.e they cannot be split betwen registers and the stack) } else { - size = 1; // On ARM64, all primitive types fit in a single (64-bit) 'slot' + size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' } #elif defined(_TARGET_ARM_) if (isStructArg) @@ -3187,9 +3269,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #ifdef _TARGET_ARM_ else if (isHfaArg) { - size = GetHfaSlots(argx); + size = GetHfaCount(argx); } -#endif +#endif // _TARGET_ARM_ else // struct type { // We handle two opcodes: GT_MKREFANY and GT_OBJ @@ -3217,9 +3299,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) size = 2; #endif } - else + else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA { - // GT_OBJ case GenTreePtr argObj = argx; GenTreePtr* parentOfArgObj = parentArgx; @@ -3244,6 +3325,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned originalSize = info.compCompHnd->getClassSize(objClass); originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize); unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); + + structSize = originalSize; + #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // On System V OS-es a struct is never passed by reference. // It is either passed by value on the stack or in registers. @@ -3252,11 +3336,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) bool passStructByRef = false; #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING + // The following if-then-else needs to be carefully refactored + // Basically the else portion wants to turn a struct load (a GT_OBJ)' + // into a GT_IND of the appropriate size. + // It can do this with structs sizes that are 1,2,4, or 8 bytes + // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?) + // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below + // It also can't do this if we have a HFA arg, + // unless we have a 1-elem HFA in which case we want to do the optization + // #ifndef _TARGET_X86_ #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING // Check for struct argument with size 1, 2, 4 or 8 bytes // As we can optimize these by turning them into a GT_IND of the correct type - if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0)) + if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0) || (isHfaArg && (hfaSlots != 1))) #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { // Normalize 'size' to the number of pointer sized items @@ -3281,7 +3374,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) - if (size > 2) + if ((size > 2) && !isHfaArg) { size = 1; // This must be copied to a temp and passed by address passStructByRef = true; @@ -3308,11 +3401,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // _TARGET_ARM_ } #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING - else + else // We have a struct argument with size 1, 2, 4 or 8 bytes { // change our GT_OBJ into a GT_IND of the correct type. + // We've already ensured above that size is a power of 2, and less than or equal to pointer size. structBaseType = argOrReturnTypeForStruct(originalSize, objClass, false /* forReturn */); - // We've already ensured above that size is a power of 2, and less than pointer size. + if (isHfaArg) + { + // If we reach here with an HFA arg it has to be a one element HFA + assert(hfaSlots == 1); + structBaseType = hfaType; // change the indirection type to a floating point type + } noway_assert(structBaseType != TYP_UNKNOWN); argObj->ChangeOper(GT_IND); @@ -3416,39 +3515,46 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) ) { - // if the valuetype size is not a multiple of sizeof(void*), - // we must copyblk to a temp before doing the obj to avoid - // the obj reading memory past the end of the valuetype + if (isHfaArg && passUsingFloatRegs) + { + size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA + } + else + { + // if the valuetype size is not a multiple of sizeof(void*), + // we must copyblk to a temp before doing the obj to avoid + // the obj reading memory past the end of the valuetype #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction - copyBlkClass = objClass; - size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items -#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) - if (roundupSize > originalSize) - { copyBlkClass = objClass; + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items +#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) + if (roundupSize > originalSize) + { + copyBlkClass = objClass; - // There are a few special cases where we can omit using a CopyBlk - // where we normally would need to use one. + // There are a few special cases where we can omit using a CopyBlk + // where we normally would need to use one. - GenTreePtr objAddr = argObj->gtObj.gtOp1; - if (objAddr->gtOper == GT_ADDR) - { - // exception : no need to use CopyBlk if the valuetype is on the stack - if (objAddr->gtFlags & GTF_ADDR_ONSTACK) - { - copyBlkClass = NO_CLASS_HANDLE; - } - // exception : no need to use CopyBlk if the valuetype is already a struct local - else if (objAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR) + GenTreePtr objAddr = argObj->gtObj.gtOp1; + if (objAddr->gtOper == GT_ADDR) { - copyBlkClass = NO_CLASS_HANDLE; + // exception : no need to use CopyBlk if the valuetype is on the stack + if (objAddr->gtFlags & GTF_ADDR_ONSTACK) + { + copyBlkClass = NO_CLASS_HANDLE; + } + // exception : no need to use CopyBlk if the valuetype is already a struct local + else if (objAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + copyBlkClass = NO_CLASS_HANDLE; + } } } - } - size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items #endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) + } } } if (size > 1) @@ -3515,6 +3621,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { // Check if the last register needed is still in the fp argument register range. isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; + + // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? + if (isHfaArg && !isRegArg) + { + // recompute the 'size' so that it represent the number of stack slots rather than the number of registers + // + unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); + size = roundupSize / TARGET_POINTER_SIZE; + } } else { @@ -3749,8 +3864,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) ); (void)newArg; //prevent "unused variable" error from GCC + + newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs + #ifdef _TARGET_ARM_ - newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs newArg->SetIsBackFilled(isBackFilled); #endif // _TARGET_ARM_ } @@ -3940,22 +4057,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // calls to update lvaOutgoingArgSpaceSize. if (!call->IsFastTailCall()) { - unsigned preallocatedArgCount; + unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); + +#if defined(UNIX_AMD64_ABI) + opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly + + // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above. -#if defined(_TARGET_ARMARCH_) - // First slots go in registers only, no stack needed. - // TODO-ARMArch-CQ: This calculation is only accurate for integer arguments, - // and ignores floating point args (it is overly conservative in that case). - if (argSlots <= MAX_REG_ARG) - { - preallocatedArgCount = 0; - } - else - { - preallocatedArgCount = argSlots - MAX_REG_ARG; - } -#elif defined(UNIX_AMD64_ABI) - opts.compNeedToAlignFrame = true; // First slots go in registers only, no stack needed. // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments, // and ignores floating point args (it is overly conservative in that case). @@ -3967,11 +4075,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG; } -#elif defined(_TARGET_AMD64_) - preallocatedArgCount = max(4, argSlots); -#else -#error Unsupported or unset target architecture -#endif // _TARGET_* +#endif // UNIX_AMD64_ABI + + // Check if we need to increase the size of our Outgoing Arg Space if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize) { lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; @@ -3983,9 +4089,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // stack alignment boundary. if (compLocallocUsed) { - lvaOutgoingArgSpaceSize = (lvaOutgoingArgSpaceSize + (STACK_ALIGN - 1)) & ~(STACK_ALIGN - 1); + lvaOutgoingArgSpaceSize = (unsigned) roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN); } + } +#ifdef DEBUG + if (verbose) + { + printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d", + argSlots, preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize); } +#endif } #endif // FEATURE_FIXED_OUT_ARGS @@ -4038,6 +4151,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef DEBUG + if (verbose) + { + fgArgInfoPtr argInfo = call->fgArgInfo; + + for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++) + { + fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx]; + curArgEntry->Dump(); + } + } +#endif + return call; } #ifdef _PREFAST_ @@ -4340,8 +4466,10 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // Morph the argument into a set of GT_LIST nodes. // // Arguments: -// arg - A GenTree node containing a TYP_STRUCT arg that -// is to be passed in multiple registers +// arg - A GenTree node containing a TYP_STRUCT arg that +// is to be passed in multiple registers +// fgEntryPtr - the fgArgTabEntry information for the current 'arg' +// // Notes: // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable // for passing in multiple registers. @@ -4354,32 +4482,116 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // For the GT_OBJ case will clone the address expression and generate two (or more) // indirections. // Currently the implementation only handles ARM64 and will NYI for other architectures. -// And for ARM64 we do not ye handle HFA arguments, so only 16-byte struct sizes are supported. // GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr) { - GenTreeArgList* newArg = nullptr; assert(arg->TypeGet() == TYP_STRUCT); - GenTreePtr argValue = arg; #ifndef _TARGET_ARM64_ - NYI("fgMorphMultiregStructArg non-ARM64 implementation"); + NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif - // If we have a GT_OBJ of a GT_ADDR then - // we set argValue to the child node ofthe GT_ADDR +#if FEATURE_MULTIREG_ARGS + // Examine 'arg' and setup argValue objClass and structSize + // + CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE; + GenTreePtr argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; + if (arg->OperGet() == GT_OBJ) { - GenTreePtr argAddr = arg->gtOp.gtOp1; + GenTreeObj* argObj = arg->AsObj(); + objClass = argObj->gtClass; + structSize = info.compCompHnd->getClassSize(objClass); + + // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR + // + if (argObj->gtOp1->OperGet() == GT_ADDR) + { + argValue = argObj->gtOp1->gtOp.gtOp1; + } + } + else if (arg->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + objClass = lvaGetStruct(varNum); + structSize = varDsc->lvExactSize; + } + noway_assert(objClass != nullptr); + + var_types hfaType = TYP_UNDEF; + var_types elemType = TYP_UNDEF; + unsigned elemCount = 0; + unsigned elemSize = 0; + var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 - if (argAddr->OperGet() == GT_ADDR) + hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF + if (varTypeIsFloating(hfaType)) + { + elemType = hfaType; + elemSize = genTypeSize(elemType); + elemCount = structSize / elemSize; + assert(elemSize*elemCount == structSize); + for (unsigned inx = 0; inxgtOp.gtOp1; + type[inx] = elemType; } } + else + { + assert(structSize <= 2 * TARGET_POINTER_SIZE); + BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE }; + info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + elemCount = 2; + type[0] = getJitGCType(gcPtrs[0]); + type[1] = getJitGCType(gcPtrs[1]); + + if ((argValue->OperGet() == GT_LCL_FLD) || + (argValue->OperGet() == GT_LCL_VAR)) + { + // We can safely widen this to 16 bytes since we are loading from + // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and + // lives in the stack frame or will be a promoted field. + // + elemSize = TARGET_POINTER_SIZE; + structSize = 2 * TARGET_POINTER_SIZE; + } + else // we must have a GT_OBJ + { + assert(argValue->OperGet() == GT_OBJ); + + // We need to load the struct from an arbitrary address + // and we can't read past the end of the structSize + // We adjust the second load type here + // + if (structSize < 2 * TARGET_POINTER_SIZE) + { + switch (structSize - TARGET_POINTER_SIZE) { + case 1: + type[1] = TYP_BYTE; + break; + case 2: + type[1] = TYP_SHORT; + break; + case 4: + type[1] = TYP_INT; + break; + default: + noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); + break; + } + } + } + } // We should still have a TYP_STRUCT assert(argValue->TypeGet() == TYP_STRUCT); + GenTreeArgList* newArg = nullptr; + // Are we passing a struct LclVar? // if (argValue->OperGet() == GT_LCL_VAR) @@ -4389,18 +4601,63 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPt assert(varNum < lvaCount); LclVarDsc* varDsc = &lvaTable[varNum]; - // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument - assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); + // At this point any TYP_STRUCT LclVar must be a 16-byte struct + // or an HFA struct, both which are passed by value. + // + assert((varDsc->lvSize() == 2*TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); - const BYTE * gcPtrs = varDsc->lvGcLayout; + varDsc->lvIsMultiRegArgOrRet = true; - var_types type0 = getJitGCType(gcPtrs[0]); - var_types type1 = getJitGCType(gcPtrs[1]); +#ifdef DEBUG + if (verbose) + { + JITDUMP("Multireg struct argument V%02u : "); + fgEntryPtr->Dump(); + } +#endif // DEBUG - varDsc->lvIsMultiRegArgOrRet = true; + // This local variable must match the layout of the 'objClass' type exactly + if (varDsc->lvIsHfa()) + { + // We have a HFA struct + noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE)); + noway_assert(elemSize == genTypeSize(elemType)); + noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); + noway_assert(elemSize*elemCount == varDsc->lvExactSize); - // Is this LclVar a promoted struct with exactly two fields? - if ((varDsc->lvPromoted) && (varDsc->lvFieldCnt == 2)) + for (unsigned inx = 0; (inx < elemCount); inx++) + { + noway_assert(type[inx] == elemType); + } + } + else + { + // We must have a 16-byte struct (non-HFA) + noway_assert(elemCount == 2); + + for (unsigned inx = 0; inx < elemCount; inx++) + { + CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx]; + + // We setup the type[inx] value above using the GC info from 'objClass' + // This GT_LCL_VAR must have the same GC layout info + // + if (currentGcLayoutType != TYPE_GC_NONE) + { + noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType)); + } + else + { + // We may have use a small type when we setup the type[inx] values above + // We can safely widen this to TYP_I_IMPL + type[inx] = TYP_I_IMPL; + } + } + } + + // Is this LclVar a promoted struct with exactly 2 fields? + // TODO-ARM64-CQ: Support struct promoted HFA types here + if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2)) { // See if we have two promoted fields that start at offset 0 and 8? unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); @@ -4440,39 +4697,17 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPt } } } - - // Check if we couldn't transform the LDOBJ(ADDR(LCLVAR)) into a struct promoted GT_LIST above - if (newArg == nullptr) + else { // - // We weren't able to pass this LclVar using it's struct promted fields - // - // So instead we will create a list of GT_LCL_FLDs nodes to pass this struct + // We will create a list of GT_LCL_FLDs nodes to pass this struct // lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); - - // If this is going in the register area, we transform it here into a GT_LIST of LCLFLD's - // If this is going in the outgoing arg area, it will be transformed later - // - if (fgEntryPtr->regNum != REG_STK) - { - GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - - GenTreePtr loLclFld = gtNewLclFldNode(varNum, type0, 0); - GenTreePtr hiLclFld = gtNewLclFldNode(varNum, type1, TARGET_POINTER_SIZE); - - // Create a new tree for 'arg' - // replace the existing LDOBJ(ADDR(LCLVAR)) - // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr)) - // - newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld)); - } } } - // Check if we already created a replacement newArg above + + // If we didn't set newarg to a new List Node tree + // if (newArg == nullptr) { if (fgEntryPtr->regNum == REG_STK) @@ -4480,73 +4715,116 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPt // We leave this stack passed argument alone return arg; } - } - - // Are we passing a GT_LCL_FLD which contain a 16-byte struct inside it? - // - if (argValue->OperGet() == GT_LCL_FLD) - { - GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - unsigned baseOffset = argValue->gtLclFld.gtLclOffs; - unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; - unsigned requiredSize = baseOffset + (2 * TARGET_POINTER_SIZE); + // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted ) + // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it? + // + if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->gtLclNum; + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; - // The allocated size of our LocalVar must be at least as big as requiredSize - assert(varDsc->lvSize() >= requiredSize); + unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0; + unsigned lastOffset = baseOffset + (elemCount * elemSize); - const BYTE * gcPtrs = varDsc->lvGcLayout; + // The allocated size of our LocalVar must be at least as big as lastOffset + assert(varDsc->lvSize() >= lastOffset); - var_types type0 = getJitGCType(gcPtrs[baseIndex+0]); - var_types type1 = getJitGCType(gcPtrs[baseIndex+1]); + if (varDsc->lvStructGcCount > 0) + { + // alignment of the baseOffset is required + noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0); + noway_assert(elemSize == TARGET_POINTER_SIZE); + unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; + const BYTE * gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable + for (unsigned inx = 0; (inx < elemCount); inx++) + { + // The GC information must match what we setup using 'objClass' + noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx])); + } + } + else // this varDsc contains no GC pointers + { + for (unsigned inx = 0; inx 0) + { + inx--; + offset -= elemSize; + GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); + if (newArg == nullptr) + { + newArg = gtNewArgList(nextLclFld); + } + else + { + newArg = gtNewListNode(nextLclFld, newArg); + } + } + } + // Are we passing a GT_OBJ struct? // - newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld)); - } - // Are we passing a GT_OBJ struct? - // - else if (argValue->OperGet() == GT_OBJ) - { - GenTreeObj* argObj = argValue->AsObj(); - CORINFO_CLASS_HANDLE objClass = argObj->gtClass; - - int structSize = info.compCompHnd->getClassSize(objClass); - assert(structSize <= 2 * TARGET_POINTER_SIZE); - BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE }; - info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); - - var_types type0 = getJitGCType(gcPtrs[0]); - var_types type1 = getJitGCType(gcPtrs[1]); - - GenTreePtr baseAddr = argObj->gtOp1; - GenTreePtr baseAddrDup = gtCloneExpr(baseAddr); - noway_assert(baseAddrDup != nullptr); + else if (argValue->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argValue->AsObj(); + GenTreePtr baseAddr = argObj->gtOp1; + var_types addrType = baseAddr->TypeGet(); - var_types addrType = baseAddr->TypeGet(); - GenTreePtr loAddr = baseAddr; - GenTreePtr hiAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL)); - GenTreePtr loValue = gtNewOperNode(GT_IND, type0, loAddr); - GenTreePtr hiValue = gtNewOperNode(GT_IND, type1, hiAddr); + // Create a new tree for 'arg' + // replace the existing LDOBJ(EXPR) + // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...) + // - // Create a new tree for 'arg' - // replace the existing LDOBJ(EXPR) - // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr)) - // - newArg = gtNewListNode(loValue, gtNewArgList(hiValue)); + // Start building our list from the last element + unsigned offset = structSize; + unsigned inx = elemCount; + while (inx > 0) + { + inx--; + elemSize = genTypeSize(type[inx]); + offset -= elemSize; + GenTreePtr curAddr = baseAddr; + if (offset != 0) + { + GenTreePtr baseAddrDup = gtCloneExpr(baseAddr); + noway_assert(baseAddrDup != nullptr); + curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); + } + else + { + curAddr = baseAddr; + } + GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr); + if (newArg == nullptr) + { + newArg = gtNewArgList(curItem); + } + else + { + newArg = gtNewListNode(curItem, newArg); + } + } + } } // If we reach here we should have set newArg to something @@ -4567,9 +4845,13 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPt #endif arg = newArg; // consider calling fgMorphTree(newArg); + +#endif // FEATURE_MULTIREG_ARGS + return arg; } + // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg void @@ -4757,7 +5039,7 @@ void Compiler::fgFixupStructReturn(GenTreePtr callNode) if (!callHasRetBuffArg && varTypeIsStruct(call)) { -#ifdef _TARGET_ARM_ +#ifdef FEATURE_HFA if (call->gtCall.IsVarargs() || !IsHfa(call)) #endif { @@ -4767,7 +5049,7 @@ void Compiler::fgFixupStructReturn(GenTreePtr callNode) } } -#ifdef _TARGET_ARM_ +#ifdef FEATURE_HFA // Either we don't have a struct now or if struct, then it is HFA returned in regs. assert(!varTypeIsStruct(call) || (IsHfa(call) && !callHasRetBuffArg)); #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) @@ -7255,7 +7537,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) GenTree* result = call; if (callType != TYP_VOID && info.compRetType != TYP_VOID) { -#ifdef _TARGET_ARM_ +#ifdef FEATURE_HFA // Return a dummy node, as the return is already removed. if (callType == TYP_STRUCT) { @@ -15934,7 +16216,7 @@ Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkD /***************************************************************************** * * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference. - * For ARM64, this is structs larger than 16 bytes that are passed by reference. + * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference. */ void Compiler::fgMarkImplicitByRefArgs() { @@ -15969,7 +16251,8 @@ void Compiler::fgMarkImplicitByRefArgs() #if defined(_TARGET_AMD64_) if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) #elif defined(_TARGET_ARM64_) - if (size > 16) + if ((size > TARGET_POINTER_SIZE) && !varDsc->lvIsMultiregStruct()) + #endif { // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local @@ -15981,7 +16264,6 @@ void Compiler::fgMarkImplicitByRefArgs() // Also marking them as BYREF will hide them from struct promotion. varDsc->lvType = TYP_BYREF; - varDsc->lvRefCnt = 0; // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF @@ -15994,6 +16276,11 @@ void Compiler::fgMarkImplicitByRefArgs() // This should not be converted to a double in stress mode, // because it is really a pointer varDsc->lvKeepType = 1; + + if (verbose) + { + printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum); + } #endif // DEBUG } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING diff --git a/src/coreclr/src/jit/protojit/CMakeLists.txt b/src/coreclr/src/jit/protojit/CMakeLists.txt index f5b55ab..e3cc769 100644 --- a/src/coreclr/src/jit/protojit/CMakeLists.txt +++ b/src/coreclr/src/jit/protojit/CMakeLists.txt @@ -5,16 +5,16 @@ add_definitions(-DFEATURE_NO_HOST) add_definitions(-DSELF_NO_HOST) remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE) -if(CLR_CMAKE_PLATFORM_LINUX OR CLR_CMAKE_PLATFORM_NETBSD) - # This is required to force using our own PAL, not one that we are loaded with. - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Xlinker -Bsymbolic -Bsymbolic-functions") -endif(CLR_CMAKE_PLATFORM_LINUX OR CLR_CMAKE_PLATFORM_NETBSD) - add_library_clr(protojit SHARED ${SHARED_LIB_SOURCES} ) +add_dependencies(protojit jit_exports) + +set_property(TARGET protojit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) +set_property(TARGET protojit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) + set(RYUJIT_LINK_LIBRARIES utilcodestaticnohost gcinfo diff --git a/src/coreclr/src/jit/regalloc.cpp b/src/coreclr/src/jit/regalloc.cpp index 4d10ea0..26f1ac9 100644 --- a/src/coreclr/src/jit/regalloc.cpp +++ b/src/coreclr/src/jit/regalloc.cpp @@ -293,13 +293,11 @@ regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree) { var_types type = tree->TypeGet(); -#ifdef _TARGET_ARM_ if (type == TYP_STRUCT && IsHfa(tree)) { - int retSlots = GetHfaSlots(tree); + int retSlots = GetHfaCount(tree); return ((1 << retSlots) - 1) << REG_FLOATRET; } -#endif const static regMaskTP returnMap[TYP_COUNT] = @@ -672,22 +670,6 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar regState->rsCalleeRegArgMaskLiveIn |= genRegMask(inArgReg); -#if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ - if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) - { - assert(argDsc->lvIsMultiregStruct()); - - regNumber secondArgReg = argDsc->lvOtherArgReg; - - noway_assert(regState->rsIsFloat == false); - noway_assert(genRegMask(secondArgReg) & RBM_ARG_REGS); - - regState->rsCalleeRegArgMaskLiveIn |= genRegMask(secondArgReg); - } -#endif // TARGET_ARM64_ -#endif // FEATURE_MULTIREG_ARGS - #ifdef _TARGET_ARM_ if (argDsc->lvType == TYP_DOUBLE) { @@ -710,12 +692,15 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg+1)); } - else if (argDsc->lvType == TYP_STRUCT) +#endif // _TARGET_ARM_ + +#if FEATURE_MULTIREG_ARGS + if (argDsc->lvType == TYP_STRUCT) { - if (argDsc->lvIsHfaRegArg) + if (argDsc->lvIsHfaRegArg()) { assert(regState->rsIsFloat); - unsigned cSlots = GetHfaSlots(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); + unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); for (unsigned i = 1; i < cSlots; i++) { assert(inArgReg + i <= LAST_FP_ARGREG); @@ -732,12 +717,12 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar { break; } - assert(!regState->rsIsFloat); + assert(regState->rsIsFloat == false); regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); } } } -#endif // _TARGET_ARM_ +#endif // FEATURE_MULTIREG_ARGS return inArgReg; } diff --git a/src/coreclr/src/jit/scopeinfo.cpp b/src/coreclr/src/jit/scopeinfo.cpp index 7f8d8ac..f873dc7 100644 --- a/src/coreclr/src/jit/scopeinfo.cpp +++ b/src/coreclr/src/jit/scopeinfo.cpp @@ -999,12 +999,10 @@ void CodeGen::psiBegProlog() { #ifdef DEBUG var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet()); -#ifdef _TARGET_ARM_ - if (lclVarDsc1->lvIsHfaRegArg) + if (lclVarDsc1->lvIsHfaRegArg()) { regType = lclVarDsc1->GetHfaType(); } -#endif // _TARGET_ARM_ assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1); #endif // DEBUG diff --git a/src/coreclr/src/jit/standalone/CMakeLists.txt b/src/coreclr/src/jit/standalone/CMakeLists.txt index 4e70b9e..b4efc30 100644 --- a/src/coreclr/src/jit/standalone/CMakeLists.txt +++ b/src/coreclr/src/jit/standalone/CMakeLists.txt @@ -8,16 +8,16 @@ if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM) add_definitions(-DLEGACY_BACKEND) endif() -if(CLR_CMAKE_PLATFORM_LINUX OR CLR_CMAKE_PLATFORM_NETBSD) - # This is required to force using our own PAL, not one that we are loaded with. - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Xlinker -Bsymbolic -Bsymbolic-functions") -endif(CLR_CMAKE_PLATFORM_LINUX OR CLR_CMAKE_PLATFORM_NETBSD) - add_library_clr(${JIT_BASE_NAME} SHARED ${SHARED_LIB_SOURCES} ) +add_dependencies(${JIT_BASE_NAME} jit_exports) + +set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) +set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) + set(RYUJIT_LINK_LIBRARIES utilcodestaticnohost gcinfo diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h index ce23bb4..a788b8a 100644 --- a/src/coreclr/src/jit/target.h +++ b/src/coreclr/src/jit/target.h @@ -719,15 +719,15 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register #define FEATURE_STRUCT_CLASSIFIER 1 // Uses a classifier function to determine if structs are passed/returned in more than one register - #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register - #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register - #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass an argument. + #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is two SIMD16s) + #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is two SIMD16s) + #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass a single argument in multiple registers. #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value. #else // !UNIX_AMD64_ABI #define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 0 // Support for returning a single value in more than one register - #define MAX_ARG_REG_COUNT 1 // Maximum registers used to pass an argument. + #define MAX_ARG_REG_COUNT 1 // Maximum registers used to pass a single argument (no arguments are passed using multiple registers) #define MAX_RET_REG_COUNT 1 // Maximum registers used to return a value. #endif // !UNIX_AMD64_ABI @@ -1162,8 +1162,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles) #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) - #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass an argument. + #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. + #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers #else @@ -1423,8 +1424,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3) #define RBM_FLTARG_REGS (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15) - #define RBM_DBL_REGS (RBM_F0|RBM_F2|RBM_F4|RBM_F6|RBM_F8|RBM_F10|RBM_F12|RBM_F14|RBM_F16|RBM_F18|RBM_F20|RBM_F22|RBM_F24|RBM_F26|RBM_F28|RBM_F30) - + #define RBM_DBL_REGS RBM_ALLDOUBLE SELECTANY const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 }; SELECTANY const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; @@ -1474,10 +1474,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 0 // Support for returning a single value in more than one register - #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register - #define MAX_PASS_MULTIREG_BYTES 16 // Maximum size of a struct that could be passed in more than one register - #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass an argument. - #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value. + #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA) + #define MAX_RET_MULTIREG_BYTES 0 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) + #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) + #define MAX_RET_REG_COUNT 1 // Maximum registers used to return a value. #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers @@ -1679,7 +1680,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define MAX_REG_ARG 8 #define MAX_FLOAT_REG_ARG 8 - #define MAX_HFA_RET_SLOTS 8 #define REG_ARG_FIRST REG_R0 #define REG_ARG_LAST REG_R7 diff --git a/src/coreclr/tests/arm64/Tests.lst b/src/coreclr/tests/arm64/Tests.lst index b1accfa..4b2b853 100644 --- a/src/coreclr/tests/arm64/Tests.lst +++ b/src/coreclr/tests/arm64/Tests.lst @@ -34745,14 +34745,14 @@ RelativePath=JIT\jit64\hfa\main\testA\hfa_nd2A_d\hfa_nd2A_d.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_nd2A_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nd2A_r.cmd_5003] RelativePath=JIT\jit64\hfa\main\testA\hfa_nd2A_r\hfa_nd2A_r.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_nd2A_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nf0A_d.cmd_5004] RelativePath=JIT\jit64\hfa\main\testA\hfa_nf0A_d\hfa_nf0A_d.cmd @@ -34787,14 +34787,14 @@ RelativePath=JIT\jit64\hfa\main\testA\hfa_nf2A_d\hfa_nf2A_d.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_nf2A_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nf2A_r.cmd_5009] RelativePath=JIT\jit64\hfa\main\testA\hfa_nf2A_r\hfa_nf2A_r.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_nf2A_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sd0A_d.cmd_5010] RelativePath=JIT\jit64\hfa\main\testA\hfa_sd0A_d\hfa_sd0A_d.cmd @@ -34829,14 +34829,14 @@ RelativePath=JIT\jit64\hfa\main\testA\hfa_sd2A_d\hfa_sd2A_d.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_sd2A_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sd2A_r.cmd_5015] RelativePath=JIT\jit64\hfa\main\testA\hfa_sd2A_r\hfa_sd2A_r.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_sd2A_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sf0A_d.cmd_5016] RelativePath=JIT\jit64\hfa\main\testA\hfa_sf0A_d\hfa_sf0A_d.cmd @@ -34871,14 +34871,14 @@ RelativePath=JIT\jit64\hfa\main\testA\hfa_sf2A_d\hfa_sf2A_d.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_sf2A_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sf2A_r.cmd_5021] RelativePath=JIT\jit64\hfa\main\testA\hfa_sf2A_r\hfa_sf2A_r.cmd WorkingDir=JIT\jit64\hfa\main\testA\hfa_sf2A_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nd0B_d.cmd_5022] RelativePath=JIT\jit64\hfa\main\testB\hfa_nd0B_d\hfa_nd0B_d.cmd @@ -34899,14 +34899,14 @@ RelativePath=JIT\jit64\hfa\main\testB\hfa_nd2B_d\hfa_nd2B_d.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_nd2B_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nd2B_r.cmd_5025] RelativePath=JIT\jit64\hfa\main\testB\hfa_nd2B_r\hfa_nd2B_r.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_nd2B_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nf0B_d.cmd_5026] RelativePath=JIT\jit64\hfa\main\testB\hfa_nf0B_d\hfa_nf0B_d.cmd @@ -34927,14 +34927,14 @@ RelativePath=JIT\jit64\hfa\main\testB\hfa_nf2B_d\hfa_nf2B_d.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_nf2B_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nf2B_r.cmd_5029] RelativePath=JIT\jit64\hfa\main\testB\hfa_nf2B_r\hfa_nf2B_r.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_nf2B_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sd0B_d.cmd_5030] RelativePath=JIT\jit64\hfa\main\testB\hfa_sd0B_d\hfa_sd0B_d.cmd @@ -34955,14 +34955,14 @@ RelativePath=JIT\jit64\hfa\main\testB\hfa_sd2B_d\hfa_sd2B_d.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_sd2B_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sd2B_r.cmd_5033] RelativePath=JIT\jit64\hfa\main\testB\hfa_sd2B_r\hfa_sd2B_r.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_sd2B_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sf0B_d.cmd_5034] RelativePath=JIT\jit64\hfa\main\testB\hfa_sf0B_d\hfa_sf0B_d.cmd @@ -34983,14 +34983,14 @@ RelativePath=JIT\jit64\hfa\main\testB\hfa_sf2B_d\hfa_sf2B_d.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_sf2B_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_sf2B_r.cmd_5037] RelativePath=JIT\jit64\hfa\main\testB\hfa_sf2B_r\hfa_sf2B_r.cmd WorkingDir=JIT\jit64\hfa\main\testB\hfa_sf2B_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL +Categories=Pri0;EXPECTED_PASS;ISSUE_4946 HostStyle=0 [hfa_nd0C_d.cmd_5038] RelativePath=JIT\jit64\hfa\main\testC\hfa_nd0C_d\hfa_nd0C_d.cmd @@ -35025,14 +35025,14 @@ RelativePath=JIT\jit64\hfa\main\testC\hfa_nd2C_d\hfa_nd2C_d.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_nd2C_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_nd2C_r.cmd_5043] RelativePath=JIT\jit64\hfa\main\testC\hfa_nd2C_r\hfa_nd2C_r.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_nd2C_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_nf0C_d.cmd_5044] RelativePath=JIT\jit64\hfa\main\testC\hfa_nf0C_d\hfa_nf0C_d.cmd @@ -35067,14 +35067,14 @@ RelativePath=JIT\jit64\hfa\main\testC\hfa_nf2C_d\hfa_nf2C_d.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_nf2C_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_nf2C_r.cmd_5049] RelativePath=JIT\jit64\hfa\main\testC\hfa_nf2C_r\hfa_nf2C_r.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_nf2C_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_sd0C_d.cmd_5050] RelativePath=JIT\jit64\hfa\main\testC\hfa_sd0C_d\hfa_sd0C_d.cmd @@ -35109,14 +35109,14 @@ RelativePath=JIT\jit64\hfa\main\testC\hfa_sd2C_d\hfa_sd2C_d.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_sd2C_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_sd2C_r.cmd_5055] RelativePath=JIT\jit64\hfa\main\testC\hfa_sd2C_r\hfa_sd2C_r.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_sd2C_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_sf0C_d.cmd_5056] RelativePath=JIT\jit64\hfa\main\testC\hfa_sf0C_d\hfa_sf0C_d.cmd @@ -35151,14 +35151,14 @@ RelativePath=JIT\jit64\hfa\main\testC\hfa_sf2C_d\hfa_sf2C_d.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_sf2C_d Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_sf2C_r.cmd_5061] RelativePath=JIT\jit64\hfa\main\testC\hfa_sf2C_r\hfa_sf2C_r.cmd WorkingDir=JIT\jit64\hfa\main\testC\hfa_sf2C_r Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_FAIL;NATIVE_INTEROP +Categories=Pri0;EXPECTED_PASS;ISSUE_4946;NATIVE_INTEROP HostStyle=0 [hfa_nd0E_d.cmd_5062] RelativePath=JIT\jit64\hfa\main\testE\hfa_nd0E_d\hfa_nd0E_d.cmd @@ -52476,7 +52476,7 @@ RelativePath=JIT\Methodical\structs\systemvbringup\structinregs\structinregs.cmd WorkingDir=JIT\Methodical\structs\systemvbringup\structinregs Expected=0 MaxAllowedDurationSeconds=600 -Categories=Pri0;EXPECTED_PASS +Categories=Pri0;EXPECTED_FAIL;ISSUE_4949 HostStyle=0 [switch1.cmd_7581] RelativePath=JIT\Methodical\switch\switch1\switch1.cmd -- 2.7.4