From e62cb301df336a682c9e26b9876d5c7279d8e1e4 Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Fri, 7 Jul 2017 11:45:33 +0900 Subject: [PATCH] [RyuJIT/ARM32] Codegen: split struct argument less than 16 bytes Enable passing split struct (less than 16 bytes) - LSRA phase - Codegen phase --- src/jit/codegenarmarch.cpp | 242 ++++++++++++++++++++++++++------------------- src/jit/lsraarmarch.cpp | 41 ++++---- 2 files changed, 163 insertions(+), 120 deletions(-) diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index fbb5bd9..bb5069a 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -905,140 +905,176 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) { assert(treeNode->OperIs(GT_PUTARG_SPLIT)); - GenTreePtr source = treeNode->gtOp1; - emitter* emit = getEmitter(); - - noway_assert(source->OperGet() == GT_OBJ); + GenTreePtr source = treeNode->gtOp1; + emitter* emit = getEmitter(); + unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; + unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; - var_types targetType = source->TypeGet(); - noway_assert(varTypeIsStruct(targetType)); + if (source->OperGet() == GT_FIELD_LIST) + { + GenTreeFieldList* fieldListPtr = source->AsFieldList(); - regNumber baseReg = treeNode->ExtractTempReg(); - regNumber addrReg = REG_NA; + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + for (unsigned idx = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), idx++) + { + GenTreePtr nextArgNode = fieldListPtr->gtGetOp1(); + regNumber fieldReg = nextArgNode->gtRegNum; + genConsumeReg(nextArgNode); - GenTreeLclVarCommon* varNode = nullptr; - GenTreePtr addrNode = nullptr; + if (idx >= treeNode->gtNumRegs) + { + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); - addrNode = source->gtOp.gtOp1; + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(attr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + else + { + var_types type = treeNode->GetRegType(idx); + regNumber argReg = treeNode->GetRegNumByIdx(idx); - // addrNode can either be a GT_LCL_VAR_ADDR or an address expression - // - if (addrNode->OperGet() == GT_LCL_VAR_ADDR) - { - // We have a GT_OBJ(GT_LCL_VAR_ADDR) - // - // We will treat this case the same as above - // (i.e if we just had this GT_LCL_VAR directly as the source) - // so update 'source' to point this GT_LCL_VAR_ADDR node - // and continue to the codegen for the LCL_VAR node below - // - varNode = addrNode->AsLclVarCommon(); - addrNode = nullptr; + // If child node is not already in the register we need, move it + if (argReg != fieldReg) + { + inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); + } + } + } } + else + { + var_types targetType = source->TypeGet(); + assert(source->OperGet() == GT_OBJ); + assert(varTypeIsStruct(targetType)); - // Either varNode or addrNOde must have been setup above, - // the xor ensures that only one of the two is setup, not both - assert((varNode != nullptr) ^ (addrNode != nullptr)); - - // Setup the structSize, isHFa, and gcPtrCount - BYTE* gcPtrs = treeNode->gtGcPtrs; - unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct - int structSize = treeNode->getArgSize(); - bool isHfa = treeNode->gtIsHfa; + regNumber baseReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; - // This is the varNum for our load operations, - // only used when we have a struct with a LclVar source - unsigned srcVarNum = BAD_VAR_NUM; + GenTreeLclVarCommon* varNode = nullptr; + GenTreePtr addrNode = nullptr; - if (varNode != nullptr) - { - srcVarNum = varNode->gtLclNum; - assert(srcVarNum < compiler->lvaCount); + addrNode = source->gtOp.gtOp1; - // handle promote situation - LclVarDsc* varDsc = compiler->lvaTable + srcVarNum; - if (varDsc->lvPromoted) + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) { - NYI_ARM("CodeGen::genPutArgSplit - promoted struct"); + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; } - } - else // addrNode is used - { - assert(addrNode != nullptr); - - // Generate code to load the address that we need into a register - genConsumeAddress(addrNode); - addrReg = addrNode->gtRegNum; - } - // If we have an HFA we can't have any GC pointers, - // if not then the max size for the the struct is 16 bytes - if (isHfa) - { - noway_assert(gcPtrCount == 0); - } + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); - unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; - unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; + // Setup the structSize, isHFa, and gcPtrCount + BYTE* gcPtrs = treeNode->gtGcPtrs; + unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct + int structSize = treeNode->getArgSize(); + bool isHfa = treeNode->gtIsHfa; - // Put on stack first - unsigned nextIndex = treeNode->gtNumRegs; - unsigned structOffset = nextIndex * TARGET_POINTER_SIZE; - int remainingSize = structSize - structOffset; - - // remainingSize is always multiple of TARGET_POINTER_SIZE - assert(remainingSize % TARGET_POINTER_SIZE == 0); - while (remainingSize > 0) - { - var_types type = compiler->getJitGCType(gcPtrs[nextIndex]); + // This is the varNum for our load operations, + // only used when we have a struct with a LclVar source + unsigned srcVarNum = BAD_VAR_NUM; if (varNode != nullptr) { - // Load from our varNumImp source - emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset); + srcVarNum = varNode->gtLclNum; + assert(srcVarNum < compiler->lvaCount); + + // handle promote situation + LclVarDsc* varDsc = compiler->lvaTable + srcVarNum; + if (varDsc->lvPromoted) + { + NYI_ARM("CodeGen::genPutArgSplit - promoted struct"); + } } - else + else // addrNode is used { - // check for case of destroying the addrRegister while we still need it - assert(baseReg != addrReg); + assert(addrNode != nullptr); - // Load from our address expression source - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset); + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->gtRegNum; } - // Emit str instruction to store the register into the outgoing argument area - emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut); - argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct - structOffset += TARGET_POINTER_SIZE; - nextIndex += 1; - } + // If we have an HFA we can't have any GC pointers, + // if not then the max size for the the struct is 16 bytes + if (isHfa) + { + assert(gcPtrCount == 0); + } - // Set registers - structOffset = 0; - for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) - { - regNumber targetReg = treeNode->GetRegNumByIdx(idx); - var_types type = treeNode->GetRegType(idx); + // Put on stack first + unsigned nextIndex = treeNode->gtNumRegs; + unsigned structOffset = nextIndex * TARGET_POINTER_SIZE; + int remainingSize = structSize - structOffset; - if (varNode != nullptr) + // remainingSize is always multiple of TARGET_POINTER_SIZE + assert(remainingSize % TARGET_POINTER_SIZE == 0); + while (remainingSize > 0) { - // Load from our varNumImp source - emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset); + var_types type = compiler->getJitGCType(gcPtrs[nextIndex]); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(baseReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset); + } + + // Emit str instruction to store the register into the outgoing argument area + emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut); + argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct + structOffset += TARGET_POINTER_SIZE; + nextIndex += 1; } - else + + // Set registers + structOffset = 0; + for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) { - // check for case of destroying the addrRegister while we still need it - assert(targetReg != addrReg); + regNumber targetReg = treeNode->GetRegNumByIdx(idx); + var_types type = treeNode->GetRegType(idx); - // Load from our address expression source - emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset); + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(targetReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset); + } + structOffset += TARGET_POINTER_SIZE; } - structOffset += TARGET_POINTER_SIZE; } - genProduceReg(treeNode); } #endif // _TARGET_ARM_ diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 0f7a4c3..7e054e4 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -791,10 +791,7 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode GenTreePtr putArgChild = argNode->gtOp.gtOp1; - // Initialize 'argNode' as not contained, as this is both the default case - // and how MakeSrcContained expects to find things setup. - // - argNode->gtLsraInfo.srcCount = 1; + // Registers for split argument corresponds to source argNode->gtLsraInfo.dstCount = argInfo->numRegs; info.srcCount += argInfo->numRegs; @@ -808,23 +805,33 @@ void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNode if (putArgChild->OperGet() == GT_FIELD_LIST) { - NYI_ARM("LSRA: Oper for split struct argument is GT_FIELD_LIST"); + // Generated code: + // 1. Consume all of the items in the GT_FIELD_LIST (source) + // 2. Store to target slot and move to target registers (destination) from source + // + argNode->gtLsraInfo.srcCount = argInfo->numRegs + argInfo->numSlots; + + putArgChild->SetContained(); } + else + { + assert(putArgChild->TypeGet() == TYP_STRUCT); + assert(putArgChild->OperGet() == GT_OBJ); - assert(putArgChild->TypeGet() == TYP_STRUCT); - assert(putArgChild->OperGet() == GT_OBJ); - // We could use a ldr/str sequence so we need a internal register - argNode->gtLsraInfo.internalIntCount = 1; + argNode->gtLsraInfo.srcCount = 1; + // We could use a ldr/str sequence so we need a internal register + argNode->gtLsraInfo.internalIntCount = 1; - GenTreePtr objChild = putArgChild->gtOp.gtOp1; - if (objChild->OperGet() == GT_LCL_VAR_ADDR) - { - // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation - // - MakeSrcContained(putArgChild, objChild); + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + MakeSrcContained(argNode, putArgChild); } - MakeSrcContained(argNode, putArgChild); } #endif // _TARGET_ARM_ -- 2.7.4