Refine CopyBytes macro instruction in IA32/X64
authorsvenpanne@chromium.org <svenpanne@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 12 Nov 2013 09:08:51 +0000 (09:08 +0000)
committersvenpanne@chromium.org <svenpanne@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 12 Nov 2013 09:08:51 +0000 (09:08 +0000)
Use DWORD/QWORD copy for more short cases

BUG=
R=svenpanne@chromium.org

Review URL: https://codereview.chromium.org/66073003

Patch from Weiliang Lin <weiliang.lin@intel.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17632 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/ia32/macro-assembler-ia32.cc
src/x64/macro-assembler-x64.cc

index c673727..1bdd382 100644 (file)
@@ -2010,30 +2010,48 @@ void MacroAssembler::CopyBytes(Register source,
                                Register destination,
                                Register length,
                                Register scratch) {
-  Label loop, done, short_string, short_loop;
-  // Experimentation shows that the short string loop is faster if length < 10.
-  cmp(length, Immediate(10));
-  j(less_equal, &short_string);
-
+  Label short_loop, len4, len8, len12, done, short_string;
   ASSERT(source.is(esi));
   ASSERT(destination.is(edi));
   ASSERT(length.is(ecx));
+  cmp(length, Immediate(4));
+  j(below, &short_string, Label::kNear);
 
   // Because source is 4-byte aligned in our uses of this function,
   // we keep source aligned for the rep_movs call by copying the odd bytes
   // at the end of the ranges.
   mov(scratch, Operand(source, length, times_1, -4));
   mov(Operand(destination, length, times_1, -4), scratch);
+
+  cmp(length, Immediate(8));
+  j(below_equal, &len4, Label::kNear);
+  cmp(length, Immediate(12));
+  j(below_equal, &len8, Label::kNear);
+  cmp(length, Immediate(16));
+  j(below_equal, &len12, Label::kNear);
+
   mov(scratch, ecx);
   shr(ecx, 2);
   rep_movs();
   and_(scratch, Immediate(0x3));
   add(destination, scratch);
-  jmp(&done);
+  jmp(&done, Label::kNear);
+
+  bind(&len12);
+  mov(scratch, Operand(source, 8));
+  mov(Operand(destination, 8), scratch);
+  bind(&len8);
+  mov(scratch, Operand(source, 4));
+  mov(Operand(destination, 4), scratch);
+  bind(&len4);
+  mov(scratch, Operand(source, 0));
+  mov(Operand(destination, 0), scratch);
+  add(destination, length);
+  jmp(&done, Label::kNear);
 
   bind(&short_string);
   test(length, length);
-  j(zero, &done);
+  j(zero, &done, Label::kNear);
 
   bind(&short_loop);
   mov_b(scratch, Operand(source, 0));
index df984fc..2f7166e 100644 (file)
@@ -4415,18 +4415,27 @@ void MacroAssembler::CopyBytes(Register destination,
     cmpl(length, Immediate(min_length));
     Assert(greater_equal, kInvalidMinLength);
   }
-  Label loop, done, short_string, short_loop;
+  Label short_loop, len8, len16, len24, done, short_string;
 
-  const int kLongStringLimit = 20;
+  const int kLongStringLimit = 4 * kPointerSize;
   if (min_length <= kLongStringLimit) {
-    cmpl(length, Immediate(kLongStringLimit));
-    j(less_equal, &short_string);
+    cmpl(length, Immediate(kPointerSize));
+    j(below, &short_string, Label::kNear);
   }
 
   ASSERT(source.is(rsi));
   ASSERT(destination.is(rdi));
   ASSERT(length.is(rcx));
 
+  if (min_length <= kLongStringLimit) {
+    cmpl(length, Immediate(2 * kPointerSize));
+    j(below_equal, &len8, Label::kNear);
+    cmpl(length, Immediate(3 * kPointerSize));
+    j(below_equal, &len16, Label::kNear);
+    cmpl(length, Immediate(4 * kPointerSize));
+    j(below_equal, &len24, Label::kNear);
+  }
+
   // Because source is 8-byte aligned in our uses of this function,
   // we keep source aligned for the rep movs operation by copying the odd bytes
   // at the end of the ranges.
@@ -4440,25 +4449,38 @@ void MacroAssembler::CopyBytes(Register destination,
   addq(destination, scratch);
 
   if (min_length <= kLongStringLimit) {
-    jmp(&done);
+    jmp(&done, Label::kNear);
+    bind(&len24);
+    movq(scratch, Operand(source, 2 * kPointerSize));
+    movq(Operand(destination, 2 * kPointerSize), scratch);
+    bind(&len16);
+    movq(scratch, Operand(source, kPointerSize));
+    movq(Operand(destination, kPointerSize), scratch);
+    bind(&len8);
+    movq(scratch, Operand(source, 0));
+    movq(Operand(destination, 0), scratch);
+    // Move remaining bytes of length.
+    movq(scratch, Operand(source, length, times_1, -kPointerSize));
+    movq(Operand(destination, length, times_1, -kPointerSize), scratch);
+    addq(destination, length);
+    jmp(&done, Label::kNear);
 
     bind(&short_string);
     if (min_length == 0) {
       testl(length, length);
-      j(zero, &done);
+      j(zero, &done, Label::kNear);
     }
-    lea(scratch, Operand(destination, length, times_1, 0));
 
     bind(&short_loop);
-    movb(length, Operand(source, 0));
-    movb(Operand(destination, 0), length);
+    movb(scratch, Operand(source, 0));
+    movb(Operand(destination, 0), scratch);
     incq(source);
     incq(destination);
-    cmpq(destination, scratch);
-    j(not_equal, &short_loop);
-
-    bind(&done);
+    decl(length);
+    j(not_zero, &short_loop);
   }
+
+  bind(&done);
 }