Small optimization in LSRA for RMW intrinsics (#42564)
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Tue, 6 Oct 2020 18:58:34 +0000 (11:58 -0700)
committerGitHub <noreply@github.com>
Tue, 6 Oct 2020 18:58:34 +0000 (11:58 -0700)
RMW intrinsic operands doesn't have to be marked as "delay-free" when they can be assigned the same register as op1Reg (i.e. read-modify-write operand) and one of them is the last use.

src/coreclr/src/jit/lsraarm64.cpp

index 2138b78..b8a7415 100644 (file)
@@ -1134,29 +1134,62 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
     {
         if (intrin.op2 != nullptr)
         {
-            if (isRMW)
+            // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
+            // (i.e. a register that corresponds to read-modify-write operand) and one of them is the last use.
+
+            bool op2DelayFree = isRMW;
+            bool op3DelayFree = isRMW;
+            bool op4DelayFree = isRMW;
+
+            assert(intrin.op1 != nullptr);
+
+            if (isRMW && intrin.op1->OperIs(GT_LCL_VAR))
             {
-                srcCount += BuildDelayFreeUses(intrin.op2);
+                unsigned int varNum1    = intrin.op1->AsLclVar()->GetLclNum();
+                bool         op1LastUse = false;
+
+                unsigned int varNum2 = BAD_VAR_NUM;
+                unsigned int varNum3 = BAD_VAR_NUM;
+                unsigned int varNum4 = BAD_VAR_NUM;
+
+                if (intrin.op2->OperIs(GT_LCL_VAR))
+                {
+                    varNum2 = intrin.op2->AsLclVar()->GetLclNum();
+                    op1LastUse |= ((varNum1 == varNum2) && intrin.op2->HasLastUse());
+                }
 
                 if (intrin.op3 != nullptr)
                 {
-                    srcCount += BuildDelayFreeUses(intrin.op3);
+                    if (intrin.op3->OperIs(GT_LCL_VAR))
+                    {
+                        varNum3 = intrin.op3->AsLclVar()->GetLclNum();
+                        op1LastUse |= ((varNum1 == varNum3) && intrin.op3->HasLastUse());
+                    }
 
-                    if (intrin.op4 != nullptr)
+                    if ((intrin.op4 != nullptr) && intrin.op4->OperIs(GT_LCL_VAR))
                     {
-                        srcCount += BuildDelayFreeUses(intrin.op4);
+                        varNum4 = intrin.op4->AsLclVar()->GetLclNum();
+                        op1LastUse |= ((varNum1 == varNum4) && intrin.op4->HasLastUse());
                     }
                 }
+
+                if (op1LastUse)
+                {
+                    op2DelayFree = (varNum1 != varNum2);
+                    op3DelayFree = (varNum1 != varNum3);
+                    op4DelayFree = (varNum1 != varNum4);
+                }
             }
-            else
+
+            srcCount += op2DelayFree ? BuildDelayFreeUses(intrin.op2) : BuildOperandUses(intrin.op2);
+
+            if (intrin.op3 != nullptr)
             {
-                srcCount += BuildOperandUses(intrin.op2);
+                srcCount += op3DelayFree ? BuildDelayFreeUses(intrin.op3) : BuildOperandUses(intrin.op3);
 
-                if (intrin.op3 != nullptr)
+                if (intrin.op4 != nullptr)
                 {
-                    assert(intrin.op4 == nullptr);
-
-                    srcCount += BuildOperandUses(intrin.op3);
+                    srcCount += op4DelayFree ? BuildDelayFreeUses(intrin.op4) : BuildOperandUses(intrin.op4);
                 }
             }
         }