From: erik.corry@gmail.com Date: Mon, 17 May 2010 10:51:41 +0000 (+0000) Subject: Better peephole optimization for ARM. This is a commit of X-Git-Tag: upstream/4.7.83~21800 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=afe8c296a46f86b6b247621acd156947657545dc;p=platform%2Fupstream%2Fv8.git Better peephole optimization for ARM. This is a commit of http://codereview.chromium.org/2004006 for Subrato De. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4662 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index f1f59ce..94652bb 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -268,6 +268,20 @@ const Instr kBlxRegMask = 15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4; const Instr kBlxRegPattern = B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | 3 * B4; +// A mask for the Rd register for push, pop, ldr, str instructions. +const Instr kRdMask = 0x0000f000; +static const Instr kLdrRegFpOffsetPattern = + al | B26 | L | Offset | fp.code() * B16; +static const Instr kStrRegFpOffsetPattern = + al | B26 | Offset | fp.code() * B16; +static const Instr kLdrRegFpNegOffsetPattern = + al | B26 | L | NegOffset | fp.code() * B16; +static const Instr kStrRegFpNegOffsetPattern = + al | B26 | NegOffset | fp.code() * B16; +static const Instr kLdrStrInstrTypeMask = 0xffff0000; +static const Instr kLdrStrInstrArgumentMask = 0x0000ffff; +static const Instr kLdrStrOffsetMask = 0x00000fff; +static const int kRdShift = 12; // Spare buffer. static const int kMinimalBufferSize = 4*KB; @@ -395,6 +409,43 @@ Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) { } +Register Assembler::GetRd(Instr instr) { + Register reg; + reg.code_ = ((instr & kRdMask) >> kRdShift); + return reg; +} + + +bool Assembler::IsPush(Instr instr) { + return ((instr & ~kRdMask) == kPushRegPattern); +} + + +bool Assembler::IsPop(Instr instr) { + return ((instr & ~kRdMask) == kPopRegPattern); +} + + +bool Assembler::IsStrRegFpOffset(Instr instr) { + return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern); +} + + +bool Assembler::IsLdrRegFpOffset(Instr instr) { + return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern); +} + + +bool Assembler::IsStrRegFpNegOffset(Instr instr) { + return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern); +} + + +bool Assembler::IsLdrRegFpNegOffset(Instr instr) { + return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern); +} + + // Labels refer to positions in the (to be) generated code. // There are bound, linked, and unused labels. // @@ -1086,20 +1137,179 @@ void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) { } addrmod2(cond | B26 | L, dst, src); - // Eliminate pattern: push(r), pop(r) - // str(r, MemOperand(sp, 4, NegPreIndex), al) - // ldr(r, MemOperand(sp, 4, PostIndex), al) - // Both instructions can be eliminated. + // Eliminate pattern: push(ry), pop(rx) + // str(ry, MemOperand(sp, 4, NegPreIndex), al) + // ldr(rx, MemOperand(sp, 4, PostIndex), al) + // Both instructions can be eliminated if ry = rx. + // If ry != rx, a register copy from ry to rx is inserted + // after eliminating the push and the pop instructions. int pattern_size = 2 * kInstrSize; + Instr push_instr = instr_at(pc_ - 2 * kInstrSize); + Instr pop_instr = instr_at(pc_ - 1 * kInstrSize); + if (FLAG_push_pop_elimination && + last_bound_pos_ <= (pc_offset() - pattern_size) && + reloc_info_writer.last_pc() <= (pc_ - pattern_size) && + IsPush(push_instr) && + IsPop(pop_instr)) { + if ((pop_instr & kRdMask) != (push_instr & kRdMask)) { + // For consecutive push and pop on different registers, + // we delete both the push & pop and insert a register move. + // push ry, pop rx --> mov rx, ry + Register reg_pushed, reg_popped; + reg_pushed = GetRd(push_instr); + reg_popped = GetRd(pop_instr); + pc_ -= 2 * kInstrSize; + // Insert a mov instruction, which is better than a pair of push & pop + mov(reg_popped, reg_pushed); + if (FLAG_print_push_pop_elimination) { + PrintF("%x push/pop (diff reg) replaced by a reg move\n", pc_offset()); + } + } else { + // For consecutive push and pop on the same register, + // both the push and the pop can be deleted. + pc_ -= 2 * kInstrSize; + if (FLAG_print_push_pop_elimination) { + PrintF("%x push/pop (same reg) eliminated\n", pc_offset()); + } + } + } + + pattern_size = 2 * kInstrSize; + if (FLAG_peephole_optimization && last_bound_pos_ <= (pc_offset() - pattern_size) && - reloc_info_writer.last_pc() <= (pc_ - pattern_size) && - // Pattern. - instr_at(pc_ - 1 * kInstrSize) == (kPopRegPattern | dst.code() * B12) && - instr_at(pc_ - 2 * kInstrSize) == (kPushRegPattern | dst.code() * B12)) { - pc_ -= 2 * kInstrSize; - if (FLAG_print_push_pop_elimination) { - PrintF("%x push/pop (same reg) eliminated\n", pc_offset()); + reloc_info_writer.last_pc() <= (pc_ - pattern_size)) { + Instr str_instr = instr_at(pc_ - 2 * kInstrSize); + Instr ldr_instr = instr_at(pc_ - 1 * kInstrSize); + + if ((IsStrRegFpOffset(str_instr) && + IsLdrRegFpOffset(ldr_instr)) || + (IsStrRegFpNegOffset(str_instr) && + IsLdrRegFpNegOffset(ldr_instr))) { + if ((ldr_instr & kLdrStrInstrArgumentMask) == + (str_instr & kLdrStrInstrArgumentMask)) { + // Pattern: Ldr/str same fp+offset, same register. + // + // The following: + // str rx, [fp, #-12] + // ldr rx, [fp, #-12] + // + // Becomes: + // str rx, [fp, #-12] + + pc_ -= 1 * kInstrSize; + if (FLAG_print_peephole_optimization) { + PrintF("%x str/ldr (fp + same offset), same reg\n", pc_offset()); + } + } else if ((ldr_instr & kLdrStrOffsetMask) == + (str_instr & kLdrStrOffsetMask)) { + // Pattern: Ldr/str same fp+offset, different register. + // + // The following: + // str rx, [fp, #-12] + // ldr ry, [fp, #-12] + // + // Becomes: + // str rx, [fp, #-12] + // mov ry, rx + + Register reg_stored, reg_loaded; + reg_stored = GetRd(str_instr); + reg_loaded = GetRd(ldr_instr); + pc_ -= 1 * kInstrSize; + // Insert a mov instruction, which is better than ldr. + mov(reg_loaded, reg_stored); + if (FLAG_print_peephole_optimization) { + PrintF("%x str/ldr (fp + same offset), diff reg \n", pc_offset()); + } + } + } + } + + pattern_size = 3 * kInstrSize; + if (FLAG_push_pop_elimination && + last_bound_pos_ <= (pc_offset() - pattern_size) && + reloc_info_writer.last_pc() <= (pc_ - pattern_size)) { + Instr mem_write_instr = instr_at(pc_ - 3 * kInstrSize); + Instr ldr_instr = instr_at(pc_ - 2 * kInstrSize); + Instr mem_read_instr = instr_at(pc_ - 1 * kInstrSize); + if (IsPush(mem_write_instr) && + IsPop(mem_read_instr)) { + if ((IsLdrRegFpOffset(ldr_instr) || + IsLdrRegFpNegOffset(ldr_instr))) { + if ((mem_write_instr & kRdMask) == + (mem_read_instr & kRdMask)) { + // Pattern: push & pop from/to same register, + // with a fp+offset ldr in between + // + // The following: + // str rx, [sp, #-4]! + // ldr rz, [fp, #-24] + // ldr rx, [sp], #+4 + // + // Becomes: + // if(rx == rz) + // delete all + // else + // ldr rz, [fp, #-24] + + if ((mem_write_instr & kRdMask) == (ldr_instr & kRdMask)) { + pc_ -= 3 * kInstrSize; + } else { + pc_ -= 3 * kInstrSize; + // Reinsert back the ldr rz. + emit(ldr_instr); + } + if (FLAG_print_push_pop_elimination) { + PrintF("%x push/pop -dead ldr fp+offset in middle\n", pc_offset()); + } + } else { + // Pattern: push & pop from/to different registers + // with a fp+offset ldr in between + // + // The following: + // str rx, [sp, #-4]! + // ldr rz, [fp, #-24] + // ldr ry, [sp], #+4 + // + // Becomes: + // if(ry == rz) + // mov ry, rx; + // else if(rx != rz) + // ldr rz, [fp, #-24] + // mov ry, rx + // else if((ry != rz) || (rx == rz)) becomes: + // mov ry, rx + // ldr rz, [fp, #-24] + + Register reg_pushed, reg_popped; + if ((mem_read_instr & kRdMask) == (ldr_instr & kRdMask)) { + reg_pushed = GetRd(mem_write_instr); + reg_popped = GetRd(mem_read_instr); + pc_ -= 3 * kInstrSize; + mov(reg_popped, reg_pushed); + } else if ((mem_write_instr & kRdMask) + != (ldr_instr & kRdMask)) { + reg_pushed = GetRd(mem_write_instr); + reg_popped = GetRd(mem_read_instr); + pc_ -= 3 * kInstrSize; + emit(ldr_instr); + mov(reg_popped, reg_pushed); + } else if (((mem_read_instr & kRdMask) + != (ldr_instr & kRdMask)) || + ((mem_write_instr & kRdMask) + == (ldr_instr & kRdMask)) ) { + reg_pushed = GetRd(mem_write_instr); + reg_popped = GetRd(mem_read_instr); + pc_ -= 3 * kInstrSize; + mov(reg_popped, reg_pushed); + emit(ldr_instr); + } + if (FLAG_print_push_pop_elimination) { + PrintF("%x push/pop (ldr fp+off in middle)\n", pc_offset()); + } + } + } } } } diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index 61b84d4..3199e7e 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -987,6 +987,13 @@ class Assembler : public Malloced { static bool IsLdrRegisterImmediate(Instr instr); static int GetLdrRegisterImmediateOffset(Instr instr); static Instr SetLdrRegisterImmediateOffset(Instr instr, int offset); + static Register GetRd(Instr instr); + static bool IsPush(Instr instr); + static bool IsPop(Instr instr); + static bool IsStrRegFpOffset(Instr instr); + static bool IsLdrRegFpOffset(Instr instr); + static bool IsStrRegFpNegOffset(Instr instr); + static bool IsLdrRegFpNegOffset(Instr instr); protected: diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 490a2c5..3e9a709 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -104,6 +104,10 @@ DEFINE_bool(push_pop_elimination, true, "eliminate redundant push/pops in assembly code") DEFINE_bool(print_push_pop_elimination, false, "print elimination of redundant push/pops in assembly code") +DEFINE_bool(peephole_optimization, true, + "perform peephole optimizations in assembly code") +DEFINE_bool(print_peephole_optimization, false, + "print peephole optimizations in assembly code") DEFINE_bool(enable_sse2, true, "enable use of SSE2 instructions if available") DEFINE_bool(enable_sse3, true,