From 51d637a073e11808cd68d37773dd78130b74caf3 Mon Sep 17 00:00:00 2001 From: "svenpanne@chromium.org" Date: Tue, 12 Nov 2013 09:08:51 +0000 Subject: [PATCH] Refine CopyBytes macro instruction in IA32/X64 Use DWORD/QWORD copy for more short cases BUG= R=svenpanne@chromium.org Review URL: https://codereview.chromium.org/66073003 Patch from Weiliang Lin . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17632 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/macro-assembler-ia32.cc | 32 +++++++++++++++++++++------ src/x64/macro-assembler-x64.cc | 48 +++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index c673727..1bdd382 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -2010,30 +2010,48 @@ void MacroAssembler::CopyBytes(Register source, Register destination, Register length, Register scratch) { - Label loop, done, short_string, short_loop; - // Experimentation shows that the short string loop is faster if length < 10. - cmp(length, Immediate(10)); - j(less_equal, &short_string); - + Label short_loop, len4, len8, len12, done, short_string; ASSERT(source.is(esi)); ASSERT(destination.is(edi)); ASSERT(length.is(ecx)); + cmp(length, Immediate(4)); + j(below, &short_string, Label::kNear); // Because source is 4-byte aligned in our uses of this function, // we keep source aligned for the rep_movs call by copying the odd bytes // at the end of the ranges. mov(scratch, Operand(source, length, times_1, -4)); mov(Operand(destination, length, times_1, -4), scratch); + + cmp(length, Immediate(8)); + j(below_equal, &len4, Label::kNear); + cmp(length, Immediate(12)); + j(below_equal, &len8, Label::kNear); + cmp(length, Immediate(16)); + j(below_equal, &len12, Label::kNear); + mov(scratch, ecx); shr(ecx, 2); rep_movs(); and_(scratch, Immediate(0x3)); add(destination, scratch); - jmp(&done); + jmp(&done, Label::kNear); + + bind(&len12); + mov(scratch, Operand(source, 8)); + mov(Operand(destination, 8), scratch); + bind(&len8); + mov(scratch, Operand(source, 4)); + mov(Operand(destination, 4), scratch); + bind(&len4); + mov(scratch, Operand(source, 0)); + mov(Operand(destination, 0), scratch); + add(destination, length); + jmp(&done, Label::kNear); bind(&short_string); test(length, length); - j(zero, &done); + j(zero, &done, Label::kNear); bind(&short_loop); mov_b(scratch, Operand(source, 0)); diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index df984fc..2f7166e 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -4415,18 +4415,27 @@ void MacroAssembler::CopyBytes(Register destination, cmpl(length, Immediate(min_length)); Assert(greater_equal, kInvalidMinLength); } - Label loop, done, short_string, short_loop; + Label short_loop, len8, len16, len24, done, short_string; - const int kLongStringLimit = 20; + const int kLongStringLimit = 4 * kPointerSize; if (min_length <= kLongStringLimit) { - cmpl(length, Immediate(kLongStringLimit)); - j(less_equal, &short_string); + cmpl(length, Immediate(kPointerSize)); + j(below, &short_string, Label::kNear); } ASSERT(source.is(rsi)); ASSERT(destination.is(rdi)); ASSERT(length.is(rcx)); + if (min_length <= kLongStringLimit) { + cmpl(length, Immediate(2 * kPointerSize)); + j(below_equal, &len8, Label::kNear); + cmpl(length, Immediate(3 * kPointerSize)); + j(below_equal, &len16, Label::kNear); + cmpl(length, Immediate(4 * kPointerSize)); + j(below_equal, &len24, Label::kNear); + } + // Because source is 8-byte aligned in our uses of this function, // we keep source aligned for the rep movs operation by copying the odd bytes // at the end of the ranges. @@ -4440,25 +4449,38 @@ void MacroAssembler::CopyBytes(Register destination, addq(destination, scratch); if (min_length <= kLongStringLimit) { - jmp(&done); + jmp(&done, Label::kNear); + bind(&len24); + movq(scratch, Operand(source, 2 * kPointerSize)); + movq(Operand(destination, 2 * kPointerSize), scratch); + bind(&len16); + movq(scratch, Operand(source, kPointerSize)); + movq(Operand(destination, kPointerSize), scratch); + bind(&len8); + movq(scratch, Operand(source, 0)); + movq(Operand(destination, 0), scratch); + // Move remaining bytes of length. + movq(scratch, Operand(source, length, times_1, -kPointerSize)); + movq(Operand(destination, length, times_1, -kPointerSize), scratch); + addq(destination, length); + jmp(&done, Label::kNear); bind(&short_string); if (min_length == 0) { testl(length, length); - j(zero, &done); + j(zero, &done, Label::kNear); } - lea(scratch, Operand(destination, length, times_1, 0)); bind(&short_loop); - movb(length, Operand(source, 0)); - movb(Operand(destination, 0), length); + movb(scratch, Operand(source, 0)); + movb(Operand(destination, 0), scratch); incq(source); incq(destination); - cmpq(destination, scratch); - j(not_equal, &short_loop); - - bind(&done); + decl(length); + j(not_zero, &short_loop); } + + bind(&done); } -- 2.7.4