From ee7bdef1430522334c2808d6ea66fe609f5dc9ff Mon Sep 17 00:00:00 2001 From: "rodolph.perfetta@gmail.com" Date: Fri, 12 Apr 2013 14:46:43 +0000 Subject: [PATCH] ARM: Small copy optimization. Copying 64bits at a time. BUG=none TEST=none Review URL: https://chromiumcodereview.appspot.com/14121006 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@14254 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.h | 2 ++ src/arm/code-stubs-arm.cc | 2 +- src/arm/full-codegen-arm.cc | 2 +- src/arm/lithium-codegen-arm.cc | 13 ++----------- src/arm/macro-assembler-arm.cc | 33 +++++++++++++++------------------ src/arm/macro-assembler-arm.h | 6 +++++- 6 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index eab9852..57582b6 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -212,6 +212,7 @@ const Register pc = { kRegister_pc_Code }; // Single word VFP register. struct SwVfpRegister { + static const int kSizeInBytes = 4; bool is_valid() const { return 0 <= code_ && code_ < 32; } bool is(SwVfpRegister reg) const { return code_ == reg.code_; } int code() const { @@ -242,6 +243,7 @@ struct DwVfpRegister { static const int kNumReservedRegisters = 2; static const int kMaxNumAllocatableRegisters = kMaxNumRegisters - kNumReservedRegisters; + static const int kSizeInBytes = 8; // Note: the number of registers can be different at snapshot and run-time. // Any code included in the snapshot must be able to run both with 16 or 32 diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index dc0bffe..a17c7dd 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -4448,7 +4448,7 @@ void ArgumentsAccessStub::GenerateNewStrict(MacroAssembler* masm) { Context::STRICT_MODE_ARGUMENTS_BOILERPLATE_INDEX))); // Copy the JS object part. - __ CopyFields(r0, r4, r3.bit(), JSObject::kHeaderSize / kPointerSize); + __ CopyFields(r0, r4, d0, s0, JSObject::kHeaderSize / kPointerSize); // Get the length (smi tagged) and set that as an in-object property too. STATIC_ASSERT(Heap::kArgumentsLengthIndex == 0); diff --git a/src/arm/full-codegen-arm.cc b/src/arm/full-codegen-arm.cc index d34b460..ba0f141 100644 --- a/src/arm/full-codegen-arm.cc +++ b/src/arm/full-codegen-arm.cc @@ -1563,7 +1563,7 @@ void FullCodeGenerator::VisitRegExpLiteral(RegExpLiteral* expr) { // r0: Newly allocated regexp. // r5: Materialized regexp. // r2: temp. - __ CopyFields(r0, r5, r2.bit(), size / kPointerSize); + __ CopyFields(r0, r5, d0, s0, size / kPointerSize); context()->Plug(r0); } diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index 5d02613..82134b4 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -5614,17 +5614,8 @@ void LCodeGen::DoRegExpLiteral(LRegExpLiteral* instr) { __ bind(&allocated); // Copy the content into the newly allocated memory. - // (Unroll copy loop once for better throughput). - for (int i = 0; i < size - kPointerSize; i += 2 * kPointerSize) { - __ ldr(r3, FieldMemOperand(r1, i)); - __ ldr(r2, FieldMemOperand(r1, i + kPointerSize)); - __ str(r3, FieldMemOperand(r0, i)); - __ str(r2, FieldMemOperand(r0, i + kPointerSize)); - } - if ((size % (2 * kPointerSize)) != 0) { - __ ldr(r3, FieldMemOperand(r1, size - kPointerSize)); - __ str(r3, FieldMemOperand(r0, size - kPointerSize)); - } + __ CopyFields(r0, r1, double_scratch0(), double_scratch0().low(), + size / kPointerSize); } diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index 465bd10..a3379d5 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -3160,27 +3160,24 @@ void MacroAssembler::AllocateHeapNumberWithValue(Register result, // Copies a fixed number of fields of heap objects from src to dst. void MacroAssembler::CopyFields(Register dst, Register src, - RegList temps, + DwVfpRegister double_scratch, + SwVfpRegister single_scratch, int field_count) { - // At least one bit set in the first 15 registers. - ASSERT((temps & ((1 << 15) - 1)) != 0); - ASSERT((temps & dst.bit()) == 0); - ASSERT((temps & src.bit()) == 0); - // Primitive implementation using only one temporary register. - - Register tmp = no_reg; - // Find a temp register in temps list. - for (int i = 0; i < 15; i++) { - if ((temps & (1 << i)) != 0) { - tmp.set_code(i); - break; - } + int double_count = field_count / (DwVfpRegister::kSizeInBytes / kPointerSize); + for (int i = 0; i < double_count; i++) { + vldr(double_scratch, FieldMemOperand(src, i * DwVfpRegister::kSizeInBytes)); + vstr(double_scratch, FieldMemOperand(dst, i * DwVfpRegister::kSizeInBytes)); } - ASSERT(!tmp.is(no_reg)); - for (int i = 0; i < field_count; i++) { - ldr(tmp, FieldMemOperand(src, i * kPointerSize)); - str(tmp, FieldMemOperand(dst, i * kPointerSize)); + STATIC_ASSERT(SwVfpRegister::kSizeInBytes == kPointerSize); + STATIC_ASSERT(2 * SwVfpRegister::kSizeInBytes == DwVfpRegister::kSizeInBytes); + + int remain = field_count % (DwVfpRegister::kSizeInBytes / kPointerSize); + if (remain != 0) { + vldr(single_scratch, + FieldMemOperand(src, (field_count - 1) * kPointerSize)); + vstr(single_scratch, + FieldMemOperand(dst, (field_count - 1) * kPointerSize)); } } diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h index e72b676..75ff64f 100644 --- a/src/arm/macro-assembler-arm.h +++ b/src/arm/macro-assembler-arm.h @@ -743,7 +743,11 @@ class MacroAssembler: public Assembler { Label* gc_required); // Copies a fixed number of fields of heap objects from src to dst. - void CopyFields(Register dst, Register src, RegList temps, int field_count); + void CopyFields(Register dst, + Register src, + DwVfpRegister double_scratch, + SwVfpRegister single_scratch, + int field_count); // Copies a number of bytes from src to dst. All registers are clobbered. On // exit src and dst will point to the place just after where the last byte was -- 2.7.4