[Arm64] SIMDIntrinsicGetItem contain mem op1
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Wed, 29 Nov 2017 00:46:43 +0000 (19:46 -0500)
committerSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Fri, 8 Dec 2017 22:42:58 +0000 (17:42 -0500)
src/jit/codegenarm64.cpp
src/jit/lowerarmarch.cpp
src/jit/lsraarm64.cpp

index da9c7bf..e63d961 100644 (file)
@@ -4578,13 +4578,9 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
     // - the source of SIMD type (op1)
     // - the index of the value to be returned.
     genConsumeOperands(simdNode);
-    regNumber srcReg = op1->gtRegNum;
 
-    // TODO-ARM64-CQ Optimize SIMDIntrinsicGetItem
-    // Optimize the case of op1 is in memory and trying to access ith element.
-    assert(op1->isUsedFromReg());
-
-    emitAttr baseTypeSize = emitTypeSize(baseType);
+    emitAttr baseTypeSize  = emitTypeSize(baseType);
+    unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
 
     if (op2->IsCnsIntOrI())
     {
@@ -4592,34 +4588,102 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
 
         ssize_t index = op2->gtIntCon.gtIconVal;
 
+        // We only need to generate code for the get if the index is valid
+        // If the index is invalid, previously generated for the range check will throw
         if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
         {
-            // Only generate code for the get if the index is valid
-            // Otherwise generated code will throw
-            getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
+            if (op1->isContained())
+            {
+                int         offset = (int)index * genTypeSize(baseType);
+                instruction ins    = ins_Load(baseType);
+                baseTypeSize       = varTypeIsFloating(baseType)
+                                   ? baseTypeSize
+                                   : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
+
+                assert(!op1->isUsedFromReg());
+
+                if (op1->OperIsLocal())
+                {
+                    unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+
+                    getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
+                }
+                else
+                {
+                    assert(op1->OperGet() == GT_IND);
+
+                    GenTree* addr = op1->AsIndir()->Addr();
+                    assert(!addr->isContained());
+                    regNumber baseReg = addr->gtRegNum;
+
+                    // ldr targetReg, [baseReg, #offset]
+                    getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
+                }
+            }
+            else
+            {
+                assert(op1->isUsedFromReg());
+                regNumber srcReg = op1->gtRegNum;
+
+                // mov targetReg, srcReg[#index]
+                getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
+            }
         }
     }
     else
     {
-        unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
-        noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
+        assert(!op2->isContained());
 
+        regNumber baseReg  = REG_NA;
         regNumber indexReg = op2->gtRegNum;
-        regNumber tmpReg   = simdNode->ExtractTempReg();
 
-        assert(genIsValidIntReg(tmpReg));
-        assert(tmpReg != indexReg);
+        if (op1->isContained())
+        {
+            // Optimize the case of op1 is in memory and trying to access ith element.
+            assert(!op1->isUsedFromReg());
+            if (op1->OperIsLocal())
+            {
+                unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+
+                baseReg = simdNode->ExtractTempReg();
+
+                // Load the address of varNum
+                getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
+            }
+            else
+            {
+                // Require GT_IND addr to be not contained.
+                assert(op1->OperGet() == GT_IND);
+
+                GenTree* addr = op1->AsIndir()->Addr();
+                assert(!addr->isContained());
 
-        unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
+                baseReg = addr->gtRegNum;
+            }
+        }
+        else
+        {
+            assert(op1->isUsedFromReg());
+            regNumber srcReg = op1->gtRegNum;
 
-        // Load the address of simdInitTempVarNum
-        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, tmpReg, simdInitTempVarNum, 0);
+            unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
+            noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
+
+            baseReg = simdNode->ExtractTempReg();
+
+            // Load the address of simdInitTempVarNum
+            getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
+
+            // Store the vector to simdInitTempVarNum
+            getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
+        }
 
-        // Store the vector to simdInitTempVarNum
-        getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, tmpReg);
+        assert(genIsValidIntReg(indexReg));
+        assert(genIsValidIntReg(baseReg));
+        assert(baseReg != indexReg);
 
-        // Load item at simdInitTempVarNum[index]
-        getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, tmpReg, indexReg, INS_OPTS_LSL,
+        // Load item at baseReg[index]
+        getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
                                         baseTypeScale);
     }
 
index 2316501..5aa3ff1 100644 (file)
@@ -785,12 +785,11 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 
         case SIMDIntrinsicGetItem:
         {
-            // TODO-ARM64-CQ Support containing op1 memory ops
-
             // This implements get_Item method. The sources are:
             //  - the source SIMD struct
             //  - index (which element to get)
             // The result is baseType of SIMD struct.
+            op1 = simdNode->gtOp.gtOp1;
             op2 = simdNode->gtOp.gtOp2;
 
             // If the index is a constant, mark it as contained.
@@ -798,6 +797,15 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
             {
                 MakeSrcContained(simdNode, op2);
             }
+
+            if (IsContainableMemoryOp(op1))
+            {
+                MakeSrcContained(simdNode, op1);
+                if (op1->OperGet() == GT_IND)
+                {
+                    op1->AsIndir()->Addr()->ClearContained();
+                }
+            }
             break;
         }
 
index 22fb56e..7c9b601 100644 (file)
@@ -815,20 +815,38 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             break;
 
         case SIMDIntrinsicGetItem:
+            op1 = simdTree->gtGetOp1();
+            op2 = simdTree->gtGetOp2();
+
             // We have an object and an item, which may be contained.
-            info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
+            info->srcCount = (op2->isContained() ? 1 : 2);
 
-            if (!simdTree->gtGetOp2()->IsCnsIntOrI())
+            if (op1->isContained())
             {
-                // If the index is not a constant, we will need a general purpose register
-                info->internalIntCount = 1;
+                // Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory
+                // and assembling them into target reg, it is not required in this case.
+                op1->gtLsraInfo.internalIntCount   = 0;
+                op1->gtLsraInfo.internalFloatCount = 0;
+                info->srcCount -= 1;
+                info->srcCount += GetOperandSourceCount(op1);
+            }
 
-                // If the index is not a constant, we will use the SIMD temp location to store the vector.
-                compiler->getSIMDInitTempVarNum();
+            if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
+            {
+                // If the index is not a constant and not contained or is a local
+                // we will need a general purpose register to calculate the address
+                info->internalIntCount = 1;
 
                 // internal register must not clobber input index
-                simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
-                info->hasDelayFreeSrc                        = true;
+                op2->gtLsraInfo.isDelayFree = true;
+                info->hasDelayFreeSrc       = true;
+            }
+
+            if (!op2->IsCnsIntOrI() && (!op1->isContained()))
+            {
+                // If vector is not already in memory (contained) and the index is not a constant,
+                // we will use the SIMD temp location to store the vector.
+                compiler->getSIMDInitTempVarNum();
             }
             break;