From cecb2c5c8021ecc0bf18f07c991881893948eea1 Mon Sep 17 00:00:00 2001 From: Mikhail Skvortcov Date: Thu, 26 Jan 2017 19:49:05 +0300 Subject: [PATCH] RyuJIT/ARM32: fix consume order and clone emitInsTernary Commit migrated from https://github.com/dotnet/coreclr/commit/076116be7b5e7890760778020604c698377a83e2 --- src/coreclr/src/jit/codegenarm.cpp | 59 ++----------- src/coreclr/src/jit/emitarm.cpp | 169 +++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+), 53 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm.cpp b/src/coreclr/src/jit/codegenarm.cpp index ea9c1f5..472db63 100644 --- a/src/coreclr/src/jit/codegenarm.cpp +++ b/src/coreclr/src/jit/codegenarm.cpp @@ -350,6 +350,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_SUB: case GT_MUL: { + genConsumeOperands(treeNode->AsOp()); + const genTreeOps oper = treeNode->OperGet(); if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow()) { @@ -364,56 +366,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // The arithmetic node must be sitting in a register (since it's not contained) noway_assert(targetReg != REG_NA); - regNumber op1reg = op1->gtRegNum; - regNumber op2reg = op2->gtRegNum; - - GenTreePtr dst; - GenTreePtr src; - - genConsumeIfReg(op1); - genConsumeIfReg(op2); - - if (!varTypeIsFloating(targetType)) - { - // This is the case of reg1 = reg1 op reg2 - // We're ready to emit the instruction without any moves - if (op1reg == targetReg) - { - dst = op1; - src = op2; - } - // We have reg1 = reg2 op reg1 - // In order for this operation to be correct - // we need that op is a commutative operation so - // we can convert it into reg1 = reg1 op reg2 and emit - // the same code as above - else if (op2reg == targetReg) - { - assert(GenTree::OperIsCommutative(treeNode->OperGet())); - dst = op2; - src = op1; - } - // dest, op1 and op2 registers are different: - // reg3 = reg1 op reg2 - // We can implement this by issuing a mov: - // reg3 = reg1 - // reg3 = reg3 op reg2 - else - { - inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType); - regTracker.rsTrackRegCopy(targetReg, op1reg); - gcInfo.gcMarkRegPtrVal(targetReg, targetType); - dst = treeNode; - src = op2; - } - - regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); - assert(r == targetReg); - } - else - { - emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), targetReg, op1reg, op2reg); - } + regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2); + assert(r == targetReg); } genProduceReg(treeNode); break; @@ -1270,13 +1224,12 @@ void CodeGen::genCodeForShift(GenTreePtr tree) assert(tree->gtRegNum != REG_NA); - GenTreePtr operand = tree->gtGetOp1(); - genConsumeReg(operand); + genConsumeOperands(tree->AsOp()); + GenTreePtr operand = tree->gtGetOp1(); GenTreePtr shiftBy = tree->gtGetOp2(); if (!shiftBy->IsCnsIntOrI()) { - genConsumeReg(shiftBy); getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); } else diff --git a/src/coreclr/src/jit/emitarm.cpp b/src/coreclr/src/jit/emitarm.cpp index 819ae3a..f7943f2 100644 --- a/src/coreclr/src/jit/emitarm.cpp +++ b/src/coreclr/src/jit/emitarm.cpp @@ -7656,5 +7656,174 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G } } +regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +{ + regNumber result = REG_NA; + + // dst can only be a reg + assert(!dst->isContained()); + + // find immed (if any) - it cannot be a dst + // Only one src can be an int. + GenTreeIntConCommon* intConst = nullptr; + GenTree* nonIntReg = nullptr; + + if (varTypeIsFloating(dst)) + { + // src1 can only be a reg + assert(!src1->isContained()); + // src2 can only be a reg + assert(!src2->isContained()); + } + else // not floating point + { + // src2 can be immed or reg + assert(!src2->isContained() || src2->isContainedIntOrIImmed()); + + // Check src2 first as we can always allow it to be a contained immediate + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + nonIntReg = src1; + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (dst->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!src2->isContainedIntOrIImmed()); + intConst = src1->AsIntConCommon(); + nonIntReg = src2; + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + } + bool isMulOverflow = false; + bool isUnsignedMul = false; + regNumber extraReg = REG_NA; + if (dst->gtOverflowEx()) + { + NYI_ARM("emitInsTernary overflow"); +#if 0 + if (ins == INS_add) + { + ins = INS_adds; + } + else if (ins == INS_sub) + { + ins = INS_subs; + } + else if (ins == INS_mul) + { + isMulOverflow = true; + isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); + assert(intConst == nullptr); // overflow format doesn't support an int constant operand + } + else + { + assert(!"Invalid ins for overflow check"); + } +#endif + } + if (intConst != nullptr) + { + emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue()); + } + else + { + if (isMulOverflow) + { + NYI_ARM("emitInsTernary overflow"); +#if 0 + // Make sure that we have an internal register + assert(genCountBits(dst->gtRsvdRegs) == 2); + + // There will be two bits set in tmpRegsMask. + // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' + regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); + assert(tmpRegsMask != RBM_NONE); + regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask + extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + + if (isUnsignedMul) + { + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); + } + else + { + assert(attr == EA_8BYTE); + // Compute the high result. + emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } + + // zero-sign bit comparision to detect overflow. + emitIns_R_I(INS_cmp, attr, extraReg, 0); + } + else + { + int bitShift = 0; + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); + + bitShift = 31; + } + else + { + assert(attr == EA_8BYTE); + // Save the high result in a temporary register. + emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); + + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + + bitShift = 63; + } + + // Sign bit comparision to detect overflow. + emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); + } +#endif + } + else + { + // We can just multiply. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } + } + + if (dst->gtOverflowEx()) + { + NYI_ARM("emitInsTernary overflow"); +#if 0 + assert(!varTypeIsFloating(dst)); + codeGen->genCheckOverflow(dst); +#endif + } + + return dst->gtRegNum; +} + #endif // !LEGACY_BACKEND #endif // defined(_TARGET_ARM_) -- 2.7.4