Improve non-SIMD block copy/init codegen (#88393)
authorEgor Bogatov <egorbo@gmail.com>
Wed, 5 Jul 2023 12:26:24 +0000 (14:26 +0200)
committerGitHub <noreply@github.com>
Wed, 5 Jul 2023 12:26:24 +0000 (14:26 +0200)
src/coreclr/jit/codegenxarch.cpp
src/coreclr/jit/compiler.h

index 4318fb4..86e8995 100644 (file)
@@ -3269,8 +3269,15 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
         }
     }
 
+    // Handle the non-SIMD remainder by overlapping with previously processed data if needed
     if (size > 0)
     {
+        assert(size <= REGSIZE_BYTES);
+
+        // Round up to the closest power of two, but make sure it's not larger
+        // than the register we used for the main loop
+        regSize = min(regSize, compiler->roundUpGPRSize(size));
+
         unsigned shiftBack = regSize - size;
         assert(shiftBack <= regSize);
         dstOffset -= shiftBack;
@@ -3546,8 +3553,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
             }
         }
 
+        // Handle the non-SIMD remainder by overlapping with previously processed data if needed
         if (size > 0)
         {
+            assert(size <= REGSIZE_BYTES);
+
+            // Round up to the closest power of two, but make sure it's not larger
+            // than the register we used for the main loop
+            regSize = min(regSize, compiler->roundUpGPRSize(size));
+
             unsigned shiftBack = regSize - size;
             assert(shiftBack <= regSize);
 
index 806c2f5..3091a5c 100644 (file)
@@ -8932,6 +8932,20 @@ private:
 #endif // FEATURE_SIMD
 
 public:
+    // Similar to roundUpSIMDSize, but for General Purpose Registers (GPR)
+    unsigned int roundUpGPRSize(unsigned size)
+    {
+        if (size > 4 && (REGSIZE_BYTES == 8))
+        {
+            return 8;
+        }
+        else if (size > 2)
+        {
+            return 4;
+        }
+        return size; // 2, 1, 0
+    }
+
     enum UnrollKind
     {
         Memset,