From f8435f4e82632f4e97675d9a9848e297255f1e0c Mon Sep 17 00:00:00 2001 From: Sven Boemer Date: Mon, 10 Apr 2023 17:48:48 -0700 Subject: [PATCH] Revert "Add SIMD to LowerCallMemcmp (#84530)" (#84595) This reverts commit eda1c3a41f1493defbb026a35de76acd4ccbeb0a. --- src/coreclr/jit/gentree.cpp | 6 ++-- src/coreclr/jit/lower.cpp | 68 +++++++------------------------------ 2 files changed, 15 insertions(+), 59 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 27073d65572..5e1a1688830 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7384,9 +7384,9 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) #ifdef FEATURE_SIMD if (varTypeIsSIMD(type)) { - GenTreeVecCon* vecCon = gtNewVconNode(type); - vecCon->gtSimdVal = simd_t::Zero(); - return vecCon; + GenTreeVecCon* allBitsSet = gtNewVconNode(type); + allBitsSet->gtSimdVal = simd_t::Zero(); + return allBitsSet; } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ef6ee62ba48..87ea617fff8 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1902,20 +1902,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call) { GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); - - ssize_t MaxUnrollSize = 16; -#ifdef FEATURE_SIMD - MaxUnrollSize = 32; -#ifdef TARGET_XARCH - if (comp->compOpportunisticallyDependsOn(InstructionSet_Vector256)) - { - MaxUnrollSize = 64; - } -// TODO-XARCH-AVX512: Consider enabling this for AVX512 -#endif -#endif - - if (cnsSize <= MaxUnrollSize) + // TODO: Add SIMD path for [16..128] via GT_HWINTRINSIC nodes + if (cnsSize <= 16) { unsigned loadWidth = 1 << BitOperations::Log2((unsigned)cnsSize); var_types loadType; @@ -1931,25 +1919,11 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call) { loadType = TYP_INT; } - else if ((loadWidth == 8) || (MaxUnrollSize == 16)) + else if ((loadWidth == 8) || (loadWidth == 16)) { loadWidth = 8; loadType = TYP_LONG; } -#ifdef FEATURE_SIMD - else if ((loadWidth == 16) || (MaxUnrollSize == 32)) - { - loadWidth = 16; - loadType = TYP_SIMD16; - } -#ifdef TARGET_XARCH - else if ((loadWidth == 32) || (MaxUnrollSize == 64)) - { - loadWidth = 32; - loadType = TYP_SIMD32; - } -#endif // TARGET_XARCH -#endif // FEATURE_SIMD else { unreached(); @@ -1958,26 +1932,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call) GenTree* result = nullptr; - auto newBinaryOp = [](Compiler* comp, genTreeOps oper, var_types type, GenTree* op1, - GenTree* op2) -> GenTree* { -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(op1)) - { - if (GenTree::OperIsCmpCompare(oper)) - { - assert(type == TYP_INT); - return comp->gtNewSimdCmpOpAllNode(oper, TYP_BOOL, op1, op2, CORINFO_TYPE_NATIVEUINT, - genTypeSize(op1), false); - } - return comp->gtNewSimdBinOpNode(oper, op1->TypeGet(), op1, op2, CORINFO_TYPE_NATIVEUINT, - genTypeSize(op1), false); - } -#endif - return comp->gtNewOperNode(oper, type, op1, op2); - }; - // loadWidth == cnsSize means a single load is enough for both args - if (loadWidth == (unsigned)cnsSize) + if ((loadWidth == (unsigned)cnsSize) && (loadWidth <= 8)) { // We're going to emit something like the following: // @@ -1987,7 +1943,7 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call) // GenTree* lIndir = comp->gtNewIndir(loadType, lArg); GenTree* rIndir = comp->gtNewIndir(loadType, rArg); - result = newBinaryOp(comp, GT_EQ, TYP_INT, lIndir, rIndir); + result = comp->gtNewOperNode(GT_EQ, TYP_INT, lIndir, rIndir); BlockRange().InsertAfter(lArg, lIndir); BlockRange().InsertAfter(rArg, rIndir); @@ -2034,17 +1990,17 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call) // GenTree* l1Indir = comp->gtNewIndir(loadType, lArgUse.Def()); GenTree* r1Indir = comp->gtNewIndir(loadType, rArgUse.Def()); - GenTree* lXor = newBinaryOp(comp, GT_XOR, actualLoadType, l1Indir, r1Indir); + GenTree* lXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l1Indir, r1Indir); GenTree* l2Offs = comp->gtNewIconNode(cnsSize - loadWidth, TYP_I_IMPL); - GenTree* l2AddOffs = newBinaryOp(comp, GT_ADD, lArg->TypeGet(), lArgClone, l2Offs); + GenTree* l2AddOffs = comp->gtNewOperNode(GT_ADD, lArg->TypeGet(), lArgClone, l2Offs); GenTree* l2Indir = comp->gtNewIndir(loadType, l2AddOffs); GenTree* r2Offs = comp->gtCloneExpr(l2Offs); // offset is the same - GenTree* r2AddOffs = newBinaryOp(comp, GT_ADD, rArg->TypeGet(), rArgClone, r2Offs); + GenTree* r2AddOffs = comp->gtNewOperNode(GT_ADD, rArg->TypeGet(), rArgClone, r2Offs); GenTree* r2Indir = comp->gtNewIndir(loadType, r2AddOffs); - GenTree* rXor = newBinaryOp(comp, GT_XOR, actualLoadType, l2Indir, r2Indir); - GenTree* resultOr = newBinaryOp(comp, GT_OR, actualLoadType, lXor, rXor); - GenTree* zeroCns = comp->gtNewZeroConNode(actualLoadType); - result = newBinaryOp(comp, GT_EQ, TYP_INT, resultOr, zeroCns); + GenTree* rXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l2Indir, r2Indir); + GenTree* resultOr = comp->gtNewOperNode(GT_OR, actualLoadType, lXor, rXor); + GenTree* zeroCns = comp->gtNewIconNode(0, actualLoadType); + result = comp->gtNewOperNode(GT_EQ, TYP_INT, resultOr, zeroCns); BlockRange().InsertAfter(rArgClone, l1Indir, r1Indir, l2Offs, l2AddOffs); BlockRange().InsertAfter(l2AddOffs, l2Indir, r2Offs, r2AddOffs, r2Indir); -- 2.34.1