Cleanup LOCKADD handling
authorMike Danes <onemihaid@hotmail.com>
Sun, 3 Jun 2018 21:50:17 +0000 (00:50 +0300)
committerMike Danes <onemihaid@hotmail.com>
Mon, 4 Jun 2018 18:37:48 +0000 (21:37 +0300)
LOCKADD nodes are generated rather early and there's no reason for that:
* The CORINFO_INTRINSIC_InterlockedAdd32/64 intrinsics are not actually used. Even if they would be used we can still import them as XADD nodes and rely on lowering to generate LOCKADD when needed.
* gtExtractSideEffList transforms XADD into LOCKADD but this can be done in lowering. LOCKADD is an XARCH specific optimization after all.

Additionally:
* Avoid the need for special handling in LSRA by making GT_LOCKADD a "no value" oper.
* Split LOCKADD codegen from XADD/XCHG codegen, attempting to use the same code for all 3 just makes things more complex.
* The address is always in a register so there's no real need to create an indir node on the fly, the relevant emitter functions can be called directly.

The last point above is actually a CQ issue - we always generate `add [reg], imm`, more complex address modes are not used. Unfortunately this problem starts early, when the importer spills the address to a local variable. If that ever gets fixed then we'll could probably generate a contained LEA in lowering.

13 files changed:
src/jit/codegen.h
src/jit/codegenarm64.cpp
src/jit/codegenarmarch.cpp
src/jit/codegenxarch.cpp
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/gtlist.h
src/jit/importer.cpp
src/jit/lower.cpp
src/jit/lsraxarch.cpp
src/jit/optimizer.cpp
src/jit/rationalize.cpp
src/jit/valuenum.cpp

index d6cd23a4c99c38ad54e4c64f5c93ff62aa308439..450a372c5478ad0fa0dc36fce012d8342c0971e7 100644 (file)
@@ -121,6 +121,9 @@ private:
     void genRangeCheck(GenTree* node);
 
     void genLockedInstructions(GenTreeOp* node);
+#ifdef _TARGET_XARCH_
+    void genCodeForLockAdd(GenTreeOp* node);
+#endif
 
     //-------------------------------------------------------------------------
     // Register-related methods
index eaaf5ba1a82483900f1b9cb32ebcf2c714f4df0a..0cc38f7304a975f8c07707dbf7ec2c179ff7d656 100644 (file)
@@ -2650,8 +2650,12 @@ void CodeGen::genJumpTable(GenTree* treeNode)
     genProduceReg(treeNode);
 }
 
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+//    treeNode - the GT_XADD/XCHG node
+//
 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
 {
     GenTree*  data      = treeNode->gtOp.gtOp2;
@@ -2701,7 +2705,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
     // Emit code like this:
     //   retry:
     //     ldxr loadReg, [addrReg]
-    //     add storeDataReg, loadReg, dataReg         # Only for GT_XADD & GT_LOCKADD
+    //     add storeDataReg, loadReg, dataReg         # Only for GT_XADD
     //                                                # GT_XCHG storeDataReg === dataReg
     //     stxr exResult, storeDataReg, [addrReg]
     //     cbnz exResult, retry
@@ -2718,7 +2722,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
     switch (treeNode->OperGet())
     {
         case GT_XADD:
-        case GT_LOCKADD:
             if (data->isContainedIntOrIImmed())
             {
                 // Even though INS_add is specified here, the encoder will choose either
index 5a2b55525d59a888ec1466b1d2bad01ade72edcb..e7209a8b9339c3f46d66ff9cf3981dfc578a2ca2 100644 (file)
@@ -340,7 +340,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 
 #ifdef _TARGET_ARM64_
-        case GT_LOCKADD:
         case GT_XCHG:
         case GT_XADD:
             genLockedInstructions(treeNode->AsOp());
index a98c9e3727a54115db1ed1b5319aba14f5e0b1f1..84999e5649a2aec2cf23e7624d427c819ecb3c31 100644 (file)
@@ -1800,6 +1800,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 
         case GT_LOCKADD:
+            genCodeForLockAdd(treeNode->AsOp());
+            break;
+
         case GT_XCHG:
         case GT_XADD:
             genLockedInstructions(treeNode->AsOp());
@@ -3513,64 +3516,78 @@ void CodeGen::genJumpTable(GenTree* treeNode)
     genProduceReg(treeNode);
 }
 
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
+//------------------------------------------------------------------------
+// genCodeForLockAdd: Generate code for a GT_LOCKADD node
+//
+// Arguments:
+//    node - the GT_LOCKADD node
+//
+void CodeGen::genCodeForLockAdd(GenTreeOp* node)
 {
-    GenTree*    data      = treeNode->gtOp.gtOp2;
-    GenTree*    addr      = treeNode->gtOp.gtOp1;
-    regNumber   targetReg = treeNode->gtRegNum;
-    regNumber   dataReg   = data->gtRegNum;
-    regNumber   addrReg   = addr->gtRegNum;
-    var_types   type      = genActualType(data->TypeGet());
-    instruction ins;
+    assert(node->OperIs(GT_LOCKADD));
 
-    // The register allocator should have extended the lifetime of the address
-    // so that it is not used as the target.
-    noway_assert(addrReg != targetReg);
+    GenTree* addr = node->gtGetOp1();
+    GenTree* data = node->gtGetOp2();
+    emitAttr size = emitTypeSize(data->TypeGet());
 
-    // If data is a lclVar that's not a last use, we'd better have allocated a register
-    // for the result (except in the case of GT_LOCKADD which does not produce a register result).
-    assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
-           (data->gtFlags & GTF_VAR_DEATH) != 0);
+    assert(addr->isUsedFromReg());
+    assert(data->isUsedFromReg() || data->isContainedIntOrIImmed());
+    assert((size == EA_4BYTE) || (size == EA_PTRSIZE));
 
-    genConsumeOperands(treeNode);
-    if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
-    {
-        inst_RV_RV(ins_Copy(type), targetReg, dataReg);
-        data->gtRegNum = targetReg;
+    genConsumeOperands(node);
+    instGen(INS_lock);
 
-        // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
-        // original gtRegNum on data, after calling emitInsBinary below.
+    if (data->isContainedIntOrIImmed())
+    {
+        int imm = static_cast<int>(data->AsIntCon()->IconValue());
+        assert(imm == data->AsIntCon()->IconValue());
+        getEmitter()->emitIns_I_AR(INS_add, size, imm, addr->gtRegNum, 0);
     }
-    switch (treeNode->OperGet())
+    else
     {
-        case GT_LOCKADD:
-            instGen(INS_lock);
-            ins = INS_add;
-            break;
-        case GT_XCHG:
-            // lock is implied by xchg
-            ins = INS_xchg;
-            break;
-        case GT_XADD:
-            instGen(INS_lock);
-            ins = INS_xadd;
-            break;
-        default:
-            unreached();
+        getEmitter()->emitIns_AR_R(INS_add, size, data->gtRegNum, addr->gtRegNum, 0);
     }
+}
 
-    // all of these nodes implicitly do an indirection on op1
-    // so create a temporary node to feed into the pattern matching
-    GenTreeIndir i = indirForm(type, addr);
-    i.SetContained();
-    getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data);
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+//    node - the GT_XADD/XCHG node
+//
+void CodeGen::genLockedInstructions(GenTreeOp* node)
+{
+    assert(node->OperIs(GT_XADD, GT_XCHG));
 
-    if (treeNode->gtRegNum != REG_NA)
+    GenTree* addr = node->gtGetOp1();
+    GenTree* data = node->gtGetOp2();
+    emitAttr size = emitTypeSize(node->TypeGet());
+
+    assert(addr->isUsedFromReg());
+    assert(data->isUsedFromReg());
+    assert((size == EA_4BYTE) || (size == EA_PTRSIZE));
+
+    genConsumeOperands(node);
+
+    if (node->gtRegNum != data->gtRegNum)
     {
-        genProduceReg(treeNode);
+        // If the destination register is different from the data register then we need
+        // to first move the data to the target register. Make sure we don't overwrite
+        // the address, the register allocator should have taken care of this.
+        assert(node->gtRegNum != addr->gtRegNum);
+        getEmitter()->emitIns_R_R(INS_mov, size, node->gtRegNum, data->gtRegNum);
+    }
+
+    instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg;
+
+    // XCHG has an implied lock prefix when the first operand is a memory operand.
+    if (ins != INS_xchg)
+    {
+        instGen(INS_lock);
     }
+
+    getEmitter()->emitIns_AR_R(ins, size, node->gtRegNum, addr->gtRegNum, 0);
+    genProduceReg(node);
 }
 
 //------------------------------------------------------------------------
index 5c0ab71196ef513fccc52cf1057de7ee2c312d5a..f124a723f9f39f175df6f953f86c6670b9ea67f0 100644 (file)
@@ -15165,15 +15165,6 @@ void Compiler::gtExtractSideEffList(GenTree*  expr,
 
     if (oper == GT_LOCKADD || oper == GT_XADD || oper == GT_XCHG || oper == GT_CMPXCHG)
     {
-        // XADD both adds to the memory location and also fetches the old value.  If we only need the side
-        // effect of this instruction, change it into a GT_LOCKADD node (the add only)
-        if (oper == GT_XADD)
-        {
-            expr->SetOperRaw(GT_LOCKADD);
-            assert(genActualType(expr->gtType) == genActualType(expr->gtOp.gtOp2->gtType));
-            expr->gtType = TYP_VOID;
-        }
-
         // These operations are kind of important to keep
         *pList = gtBuildCommaList(*pList, expr);
         return;
index 5723b060f1334a36c1d8b5a50bd174f093703c24..905022ca65c5e2a21c1c9f006e108fbda3a2a091 100644 (file)
@@ -980,7 +980,7 @@ public:
         if (gtType == TYP_VOID)
         {
             // These are the only operators which can produce either VOID or non-VOID results.
-            assert(OperIs(GT_NOP, GT_CALL, GT_LOCKADD, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
+            assert(OperIs(GT_NOP, GT_CALL, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
                    OperIsSIMD() || OperIsHWIntrinsic());
             return false;
         }
index dcfe3ba09ce05606a0e3138871237999e384baf5..bbd9bcbcd1a7212849df4bd76c61edbe7ad773bf 100644 (file)
@@ -53,7 +53,7 @@ GTNODE(RELOAD           , GenTreeCopyOrReload,0,GTK_UNOP)
 GTNODE(ARR_LENGTH       , GenTreeArrLen      ,0,GTK_UNOP|GTK_EXOP)      // array-length
 GTNODE(INTRINSIC        , GenTreeIntrinsic   ,0,GTK_BINOP|GTK_EXOP)     // intrinsics
 
-GTNODE(LOCKADD          , GenTreeOp          ,0,GTK_BINOP)
+GTNODE(LOCKADD          , GenTreeOp          ,0,GTK_BINOP|GTK_NOVALUE)
 GTNODE(XADD             , GenTreeOp          ,0,GTK_BINOP)
 GTNODE(XCHG             , GenTreeOp          ,0,GTK_BINOP)
 GTNODE(CMPXCHG          , GenTreeCmpXchg     ,0,GTK_SPECIAL)
index 3e705a8ae1f983daaf842dc4dc4bc653a8e4c6e9..13320a07fb896923c6a7d62f8370421ff9e0b0be 100644 (file)
@@ -3451,9 +3451,11 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
         // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic
+
+        // Note that CORINFO_INTRINSIC_InterlockedAdd32/64 are not actually used.
+        // Anyway, we can import them as XADD and leave it to lowering/codegen to perform
+        // whatever optimizations may arise from the fact that result value is not used.
         case CORINFO_INTRINSIC_InterlockedAdd32:
-            interlockedOperator = GT_LOCKADD;
-            goto InterlockedBinOpCommon;
         case CORINFO_INTRINSIC_InterlockedXAdd32:
             interlockedOperator = GT_XADD;
             goto InterlockedBinOpCommon;
@@ -3463,8 +3465,6 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
 #ifdef _TARGET_64BIT_
         case CORINFO_INTRINSIC_InterlockedAdd64:
-            interlockedOperator = GT_LOCKADD;
-            goto InterlockedBinOpCommon;
         case CORINFO_INTRINSIC_InterlockedXAdd64:
             interlockedOperator = GT_XADD;
             goto InterlockedBinOpCommon;
index 23015e0256005cba2970b81aaa4046af3730c34c..aa3e2f03d949937bd89d8ebd48f160e8fdefdf43 100644 (file)
@@ -305,16 +305,28 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
         }
 
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_)
         case GT_CMPXCHG:
             CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
             break;
 
         case GT_XADD:
-#endif
-        case GT_LOCKADD:
             CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
             break;
+#elif defined(_TARGET_XARCH_)
+        case GT_XADD:
+            if (node->IsUnusedValue())
+            {
+                node->ClearUnusedValue();
+                // Make sure the types are identical, since the node type is changed to VOID
+                // CodeGen relies on op2's type to determine the instruction size.
+                assert(node->gtGetOp2()->TypeGet() == node->TypeGet());
+                node->SetOper(GT_LOCKADD);
+                node->gtType = TYP_VOID;
+                CheckImmedAndMakeContained(node, node->gtGetOp2());
+            }
+            break;
+#endif
 
         default:
             break;
index 96af2f6d8a8c6d438b01d0dee0e7236bce2fc14f..943c4d3edc0193b3c417b46adc2a1cd2c8091735 100644 (file)
@@ -450,7 +450,6 @@ int LinearScan::BuildNode(GenTree* tree)
 
         case GT_XADD:
         case GT_XCHG:
-        case GT_LOCKADD:
         {
             // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
             // to special case them.
@@ -462,29 +461,10 @@ int LinearScan::BuildNode(GenTree* tree)
             RefPosition* addrUse = BuildUse(addr);
             setDelayFree(addrUse);
             tgtPrefUse = addrUse;
-            srcCount   = 1;
-            dstCount   = 1;
-            if (!data->isContained())
-            {
-                RefPosition* dataUse = dataUse = BuildUse(data);
-                srcCount                       = 2;
-            }
-
-            if (tree->TypeGet() == TYP_VOID)
-            {
-                // Right now a GT_XADD node could be morphed into a
-                // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
-                // Note that it is advantageous to use GT_LOCKADD
-                // instead of of GT_XADD as the former uses lock.add,
-                // which allows its second operand to be a contained
-                // immediate wheres xadd instruction requires its
-                // second operand to be in a register.
-                // Give it an artificial type and mark it as an unused value.
-                // This results in a Def position created but not considered consumed by its parent node.
-                tree->gtType  = TYP_INT;
-                isLocalDefUse = true;
-                tree->SetUnusedValue();
-            }
+            assert(!data->isContained());
+            BuildUse(data);
+            srcCount = 2;
+            assert(dstCount == 1);
             BuildDef(tree);
         }
         break;
@@ -771,6 +751,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
         case GT_STORE_BLK:
         case GT_STORE_OBJ:
         case GT_SWITCH_TABLE:
+        case GT_LOCKADD:
 #ifdef _TARGET_X86_
         case GT_LONG:
 #endif
index 11e956b17d1e7cf7a0a306099b2756051ba47284..5dbc404cf50c2015f7c1aa15b3807b6ab2821cbd 100644 (file)
@@ -7719,6 +7719,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
                     case GT_XCHG:    // Binop
                     case GT_CMPXCHG: // Specialop
                     {
+                        assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
                         memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
                     }
                     break;
index a08d042ed9954ee7058c869e5b3c1142e3759847..ad99a27ddf6796cc188da0c68c625eb3522f9dad 100644 (file)
@@ -921,8 +921,8 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
 #endif // FEATURE_SIMD
 
         default:
-            // JCMP, CMP, SETCC and JCC nodes should not be present in HIR.
-            assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP));
+            // These nodes should not be present in HIR.
+            assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD));
             break;
     }
 
index 306c861a79fc2c3fe4cd7a26e49fbf55a29fab7c..6bf8429abee82ba2ddcd34027b7a80ea0e21f745 100644 (file)
@@ -7052,6 +7052,7 @@ void Compiler::fgValueNumberTree(GenTree* tree, bool evalAsgLhsInd)
                 case GT_LOCKADD: // Binop
                 case GT_XADD:    // Binop
                 case GT_XCHG:    // Binop
+                    assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
                     // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed.
                     fgMutateGcHeap(tree DEBUGARG("Interlocked intrinsic"));
                     tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));