Merge branch 'master' into xcnt-false-dep
authorPent Ploompuu <kaalikas@gmail.com>
Thu, 6 Sep 2018 21:11:03 +0000 (00:11 +0300)
committerGitHub <noreply@github.com>
Thu, 6 Sep 2018 21:11:03 +0000 (00:11 +0300)
1  2 
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/lsraxarch.cpp

Simple merge
@@@ -2584,6 -2591,55 +2591,44 @@@ int LinearScan::BuildHWIntrinsic(GenTre
                  break;
              }
  
 -            case NI_BMI1_TrailingZeroCount:
 -            case NI_LZCNT_LeadingZeroCount:
 -            case NI_POPCNT_PopCount:
 -            {
 -                assert(numArgs == 1);
 -                srcCount += BuildDelayFreeUses(op1);
 -
 -                buildUses = false;
 -                break;
 -            }
 -
+             case NI_AVX2_GatherVector128:
+             case NI_AVX2_GatherVector256:
+             {
+                 assert(numArgs == 3);
+                 // Any pair of the index, mask, or destination registers should be different
+                 srcCount += BuildOperandUses(op1);
+                 srcCount += BuildDelayFreeUses(op2);
+                 // get a tmp register for mask that will be cleared by gather instructions
+                 buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
+                 setInternalRegsDelayFree = true;
+                 buildUses = false;
+                 break;
+             }
+             case NI_AVX2_GatherMaskVector128:
+             case NI_AVX2_GatherMaskVector256:
+             {
+                 assert(numArgs == 5);
+                 // Any pair of the index, mask, or destination registers should be different
+                 srcCount += BuildOperandUses(op1);
+                 srcCount += BuildOperandUses(op2);
+                 srcCount += BuildDelayFreeUses(op3);
+                 assert(intrinsicTree->gtGetOp1()->OperIsList());
+                 GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList();
+                 GenTree*        op4     = argList->Rest()->Rest()->Rest()->Current();
+                 srcCount += BuildDelayFreeUses(op4);
+                 // get a tmp register for mask that will be cleared by gather instructions
+                 buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
+                 setInternalRegsDelayFree = true;
+                 buildUses = false;
+                 break;
+             }
              default:
              {
                  assert((intrinsicId > NI_HW_INTRINSIC_START) && (intrinsicId < NI_HW_INTRINSIC_END));