Improve RA for LowerBlockStore (#83627)
authorEgor Bogatov <egorbo@gmail.com>
Tue, 21 Mar 2023 00:49:42 +0000 (01:49 +0100)
committerGitHub <noreply@github.com>
Tue, 21 Mar 2023 00:49:42 +0000 (01:49 +0100)
src/coreclr/jit/lowerxarch.cpp

index 44592e4e1f24ac54f41c23a7c347a6ce341ab64a..56fd1c30c1a653862ed12cfc3cfcf5842d060341 100644 (file)
@@ -348,15 +348,27 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
                     {
                         const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences();
 #ifdef TARGET_AMD64
-                        const bool willUseOnlySimdMov = canUse16BytesSimdMov && (size % XMM_REGSIZE_BYTES == 0);
+
+                        bool willUseOnlySimdMov = size % XMM_REGSIZE_BYTES == 0;
+                        if (!willUseOnlySimdMov)
+                        {
+                            // If we have a remainder we still might only use SIMD to process it (via overlapping)
+                            // unless it's more efficient to do that via scalar op (for sizes 1,2,4 and 8)
+                            const unsigned remainder = size % XMM_REGSIZE_BYTES;
+                            if (!isPow2(remainder) || (remainder > REGSIZE_BYTES))
+                            {
+                                willUseOnlySimdMov = true;
+                            }
+                        }
 #else
                         const bool willUseOnlySimdMov = (size % 8 == 0);
 #endif
-                        if (willUseOnlySimdMov)
+                        if (willUseOnlySimdMov && canUse16BytesSimdMov)
                         {
                             src->SetContained();
                         }
-                        else if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset, /*canUseSimd*/ false))
+                        else if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset,
+                                                                 /*canUseSimd*/ canUse16BytesSimdMov))
                         {
                             // It turns out we can't use SIMD so the default threshold is too big
                             goto TOO_BIG_TO_UNROLL;