From 701790fd34ac526111e4e7b8396da7d8b19dfb83 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Fri, 2 Dec 2011 10:01:50 +0000 Subject: [PATCH] Support multi-byte nop instructions as recommended by the optimization guides from AMD and Intel. Review URL: http://codereview.chromium.org/8776033 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10134 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/assembler-ia32.cc | 87 +++++++++++++++++++++++++++++++++++++- src/ia32/assembler-ia32.h | 3 +- src/ia32/debug-ia32.cc | 4 +- src/ia32/deoptimizer-ia32.cc | 8 ++-- src/ia32/disasm-ia32.cc | 32 ++++++++++++-- src/ia32/lithium-codegen-ia32.cc | 4 +- test/cctest/test-assembler-ia32.cc | 68 +++++++++++++++++++++++++++++ test/cctest/test-disasm-ia32.cc | 5 +++ 8 files changed, 195 insertions(+), 16 deletions(-) diff --git a/src/ia32/assembler-ia32.cc b/src/ia32/assembler-ia32.cc index 72e2bf8..ba58362 100644 --- a/src/ia32/assembler-ia32.cc +++ b/src/ia32/assembler-ia32.cc @@ -388,8 +388,91 @@ void Assembler::GetCode(CodeDesc* desc) { void Assembler::Align(int m) { ASSERT(IsPowerOf2(m)); - while ((pc_offset() & (m - 1)) != 0) { - nop(); + int mask = m - 1; + int addr = pc_offset(); + Nop((m - (addr & mask)) & mask); +} + + +bool Assembler::IsNop(Address addr) { + Address a = addr; + while (*a == 0x66) a++; + if (*a == 0x90) return true; + if (a[0] == 0xf && a[1] == 0x1f) return true; + return false; +} + + +void Assembler::Nop(int bytes) { + EnsureSpace ensure_space(this); + + if (!CpuFeatures::IsSupported(SSE2)) { + // Older CPUs that do not support SSE2 may not support multibyte NOP + // instructions. + for (; bytes > 0; bytes--) { + EMIT(0x90); + } + return; + } + + // Multi byte nops from http://support.amd.com/us/Processor_TechDocs/40546.pdf + while (bytes > 0) { + switch (bytes) { + case 2: + EMIT(0x66); + case 1: + EMIT(0x90); + return; + case 3: + EMIT(0xf); + EMIT(0x1f); + EMIT(0); + return; + case 4: + EMIT(0xf); + EMIT(0x1f); + EMIT(0x40); + EMIT(0); + return; + case 6: + EMIT(0x66); + case 5: + EMIT(0xf); + EMIT(0x1f); + EMIT(0x44); + EMIT(0); + EMIT(0); + return; + case 7: + EMIT(0xf); + EMIT(0x1f); + EMIT(0x80); + EMIT(0); + EMIT(0); + EMIT(0); + EMIT(0); + return; + default: + case 11: + EMIT(0x66); + bytes--; + case 10: + EMIT(0x66); + bytes--; + case 9: + EMIT(0x66); + bytes--; + case 8: + EMIT(0xf); + EMIT(0x1f); + EMIT(0x84); + EMIT(0); + EMIT(0); + EMIT(0); + EMIT(0); + EMIT(0); + bytes -= 8; + } } } diff --git a/src/ia32/assembler-ia32.h b/src/ia32/assembler-ia32.h index 7195895..060b01d 100644 --- a/src/ia32/assembler-ia32.h +++ b/src/ia32/assembler-ia32.h @@ -659,6 +659,7 @@ class Assembler : public AssemblerBase { // possible to align the pc offset to a multiple // of m. m must be a power of 2. void Align(int m); + void Nop(int bytes = 1); // Aligns code to something that's optimal for a jump target for the platform. void CodeTargetAlign(); @@ -1084,7 +1085,7 @@ class Assembler : public AssemblerBase { // Get the number of bytes available in the buffer. inline int available_space() const { return reloc_info_writer.pos() - pc_; } - static bool IsNop(Address addr) { return *addr == 0x90; } + static bool IsNop(Address addr); PositionsRecorder* positions_recorder() { return &positions_recorder_; } diff --git a/src/ia32/debug-ia32.cc b/src/ia32/debug-ia32.cc index 2649560..b37b54b 100644 --- a/src/ia32/debug-ia32.cc +++ b/src/ia32/debug-ia32.cc @@ -258,9 +258,7 @@ void Debug::GenerateSlot(MacroAssembler* masm) { Label check_codesize; __ bind(&check_codesize); __ RecordDebugBreakSlot(); - for (int i = 0; i < Assembler::kDebugBreakSlotLength; i++) { - __ nop(); - } + __ Nop(Assembler::kDebugBreakSlotLength); ASSERT_EQ(Assembler::kDebugBreakSlotLength, masm->SizeOfCodeGeneratedSince(&check_codesize)); } diff --git a/src/ia32/deoptimizer-ia32.cc b/src/ia32/deoptimizer-ia32.cc index eeee4f2..98c2400 100644 --- a/src/ia32/deoptimizer-ia32.cc +++ b/src/ia32/deoptimizer-ia32.cc @@ -231,8 +231,8 @@ void Deoptimizer::PatchStackCheckCodeAt(Code* unoptimized_code, ASSERT(*(call_target_address - 3) == 0x73 && // jae *(call_target_address - 2) == 0x07 && // offset *(call_target_address - 1) == 0xe8); // call - *(call_target_address - 3) = 0x90; // nop - *(call_target_address - 2) = 0x90; // nop + *(call_target_address - 3) = 0x66; // 2 byte nop part 1 + *(call_target_address - 2) = 0x90; // 2 byte nop part 2 Assembler::set_target_address_at(call_target_address, replacement_code->entry()); @@ -250,8 +250,8 @@ void Deoptimizer::RevertStackCheckCodeAt(Code* unoptimized_code, Assembler::target_address_at(call_target_address)); // Replace the nops from patching (Deoptimizer::PatchStackCheckCode) to // restore the conditional branch. - ASSERT(*(call_target_address - 3) == 0x90 && // nop - *(call_target_address - 2) == 0x90 && // nop + ASSERT(*(call_target_address - 3) == 0x66 && // 2 byte nop part 1 + *(call_target_address - 2) == 0x90 && // 2 byte nop part 2 *(call_target_address - 1) == 0xe8); // call *(call_target_address - 3) = 0x73; // jae *(call_target_address - 2) = 0x07; // offset diff --git a/src/ia32/disasm-ia32.cc b/src/ia32/disasm-ia32.cc index 643789f..0d35732 100644 --- a/src/ia32/disasm-ia32.cc +++ b/src/ia32/disasm-ia32.cc @@ -992,7 +992,7 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, break; case 0x0F: - { byte f0byte = *(data+1); + { byte f0byte = data[1]; const char* f0mnem = F0Mnem(f0byte); if (f0byte == 0x18) { int mod, regop, rm; @@ -1000,6 +1000,25 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, const char* suffix[] = {"nta", "1", "2", "3"}; AppendToBuffer("%s%s ", f0mnem, suffix[regop & 0x03]); data += PrintRightOperand(data); + } else if (f0byte == 0x1F && data[2] == 0) { + AppendToBuffer("nop"); // 3 byte nop. + data += 3; + } else if (f0byte == 0x1F && data[2] == 0x40 && data[3] == 0) { + AppendToBuffer("nop"); // 4 byte nop. + data += 4; + } else if (f0byte == 0x1F && data[2] == 0x44 && data[3] == 0 && + data[4] == 0) { + AppendToBuffer("nop"); // 5 byte nop. + data += 5; + } else if (f0byte == 0x1F && data[2] == 0x80 && data[3] == 0 && + data[4] == 0 && data[5] == 0 && data[6] == 0) { + AppendToBuffer("nop"); // 7 byte nop. + data += 7; + } else if (f0byte == 0x1F && data[2] == 0x84 && data[3] == 0 && + data[4] == 0 && data[5] == 0 && data[6] == 0 && + data[7] == 0) { + AppendToBuffer("nop"); // 8 byte nop. + data += 8; } else if (f0byte == 0xA2 || f0byte == 0x31) { AppendToBuffer("%s", f0mnem); data += 2; @@ -1135,8 +1154,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, break; case 0x66: // prefix - data++; - if (*data == 0x8B) { + while (*data == 0x66) data++; + if (*data == 0xf && data[1] == 0x1f) { + AppendToBuffer("nop"); // 0x66 prefix + } else if (*data == 0x90) { + AppendToBuffer("nop"); // 0x66 prefix + } else if (*data == 0x8B) { data++; data += PrintOperands("mov_w", REG_OPER_OP_ORDER, data); } else if (*data == 0x89) { @@ -1273,6 +1296,9 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector out_buffer, NameOfXMMRegister(rm), static_cast(imm8)); data += 2; + } else if (*data == 0x90) { + data++; + AppendToBuffer("nop"); // 2 byte nop. } else if (*data == 0xF3) { data++; int mod, regop, rm; diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc index 2369916..052115b 100644 --- a/src/ia32/lithium-codegen-ia32.cc +++ b/src/ia32/lithium-codegen-ia32.cc @@ -4458,9 +4458,7 @@ void LCodeGen::EnsureSpaceForLazyDeopt() { int patch_size = Deoptimizer::patch_size(); if (current_pc < last_lazy_deopt_pc_ + patch_size) { int padding_size = last_lazy_deopt_pc_ + patch_size - current_pc; - while (padding_size-- > 0) { - __ nop(); - } + __ Nop(padding_size); } last_lazy_deopt_pc_ = masm()->pc_offset(); } diff --git a/test/cctest/test-assembler-ia32.cc b/test/cctest/test-assembler-ia32.cc index cdab8f7..815e618 100644 --- a/test/cctest/test-assembler-ia32.cc +++ b/test/cctest/test-assembler-ia32.cc @@ -408,4 +408,72 @@ TEST(AssemblerIa3210) { __ nop(); } + +TEST(AssemblerMultiByteNop) { + InitializeVM(); + v8::HandleScope scope; + v8::internal::byte buffer[1024]; + Assembler assm(Isolate::Current(), buffer, sizeof(buffer)); + __ push(ebx); + __ push(ecx); + __ push(edx); + __ push(edi); + __ push(esi); + __ mov(eax, 1); + __ mov(ebx, 2); + __ mov(ecx, 3); + __ mov(edx, 4); + __ mov(edi, 5); + __ mov(esi, 6); + for (int i = 0; i < 16; i++) { + int before = assm.pc_offset(); + __ Nop(i); + CHECK_EQ(assm.pc_offset() - before, i); + } + + Label fail; + __ cmp(eax, 1); + __ j(not_equal, &fail); + __ cmp(ebx, 2); + __ j(not_equal, &fail); + __ cmp(ecx, 3); + __ j(not_equal, &fail); + __ cmp(edx, 4); + __ j(not_equal, &fail); + __ cmp(edi, 5); + __ j(not_equal, &fail); + __ cmp(esi, 6); + __ j(not_equal, &fail); + __ mov(eax, 42); + __ pop(esi); + __ pop(edi); + __ pop(edx); + __ pop(ecx); + __ pop(ebx); + __ ret(0); + __ bind(&fail); + __ mov(eax, 13); + __ pop(esi); + __ pop(edi); + __ pop(edx); + __ pop(ecx); + __ pop(ebx); + __ ret(0); + + CodeDesc desc; + assm.GetCode(&desc); + Code* code = Code::cast(HEAP->CreateCode( + desc, + Code::ComputeFlags(Code::STUB), + Handle(HEAP->undefined_value()))->ToObjectChecked()); + CHECK(code->IsCode()); + + F0 f = FUNCTION_CAST(code->entry()); + int res = f(); + CHECK_EQ(42, res); +} + + + + #undef __ diff --git a/test/cctest/test-disasm-ia32.cc b/test/cctest/test-disasm-ia32.cc index 1e38e4e..da09505 100644 --- a/test/cctest/test-disasm-ia32.cc +++ b/test/cctest/test-disasm-ia32.cc @@ -449,6 +449,11 @@ TEST(DisasmIa320) { } } + // Nop instructions + for (int i = 0; i < 16; i++) { + __ Nop(i); + } + __ ret(0); CodeDesc desc; -- 2.7.4