From 762157d6973e5be3a161dcee32ae4cac17a616ee Mon Sep 17 00:00:00 2001 From: "rodolph.perfetta@gmail.com" Date: Wed, 10 Jul 2013 15:32:39 +0000 Subject: [PATCH] ARM: Implement memcpy using NEON. Add support for a few NEON and ARM SIMD instructions and use them for various memcpy operations. BUG=none TEST=none Review URL: https://chromiumcodereview.appspot.com/17858002 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@15602 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.cc | 243 ++++++++++++++++++++++++- src/arm/assembler-arm.h | 113 ++++++++++++ src/arm/codegen-arm.cc | 246 ++++++++++++++++++++++++++ src/arm/constants-arm.h | 43 +++-- src/arm/disasm-arm.cc | 232 +++++++++++++++++++++++- src/arm/simulator-arm.cc | 364 +++++++++++++++++++++++++++++++++++--- src/arm/simulator-arm.h | 15 +- src/flag-definitions.h | 2 + src/globals.h | 16 +- src/platform-linux.cc | 33 ++++ src/platform-nullos.cc | 5 + src/platform-posix.cc | 26 ++- src/platform.h | 40 ++++- src/v8globals.h | 12 ++ src/v8utils.h | 104 +++++++++++ test/cctest/test-assembler-arm.cc | 182 +++++++++++++++++++ test/cctest/test-disasm-arm.cc | 33 ++++ 17 files changed, 1647 insertions(+), 62 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index e8afc5d..d95946e 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -49,6 +49,7 @@ bool CpuFeatures::initialized_ = false; #endif unsigned CpuFeatures::supported_ = 0; unsigned CpuFeatures::found_by_runtime_probing_only_ = 0; +unsigned CpuFeatures::cache_line_size_ = 64; ExternalReference ExternalReference::cpu_features() { @@ -125,6 +126,9 @@ void CpuFeatures::Probe() { static_cast(1) << VFP3 | static_cast(1) << ARMv7; } + if (FLAG_enable_neon) { + supported_ |= 1u << NEON; + } // For the simulator=arm build, use ARMv7 when FLAG_enable_armv7 is enabled if (FLAG_enable_armv7) { supported_ |= static_cast(1) << ARMv7; @@ -157,6 +161,10 @@ void CpuFeatures::Probe() { static_cast(1) << ARMv7; } + if (!IsSupported(NEON) && FLAG_enable_neon && OS::ArmCpuHasFeature(NEON)) { + found_by_runtime_probing_only_ |= 1u << NEON; + } + if (!IsSupported(ARMv7) && FLAG_enable_armv7 && OS::ArmCpuHasFeature(ARMv7)) { found_by_runtime_probing_only_ |= static_cast(1) << ARMv7; } @@ -171,12 +179,18 @@ void CpuFeatures::Probe() { static_cast(1) << UNALIGNED_ACCESSES; } - if (OS::GetCpuImplementer() == QUALCOMM_IMPLEMENTER && + CpuImplementer implementer = OS::GetCpuImplementer(); + if (implementer == QUALCOMM_IMPLEMENTER && FLAG_enable_movw_movt && OS::ArmCpuHasFeature(ARMv7)) { found_by_runtime_probing_only_ |= static_cast(1) << MOVW_MOVT_IMMEDIATE_LOADS; } + CpuPart part = OS::GetCpuPart(implementer); + if ((part == CORTEX_A9) || (part == CORTEX_A5)) { + cache_line_size_ = 32; + } + if (!IsSupported(VFP32DREGS) && FLAG_enable_32dregs && OS::ArmCpuHasFeature(VFP32DREGS)) { found_by_runtime_probing_only_ |= static_cast(1) << VFP32DREGS; @@ -247,11 +261,12 @@ void CpuFeatures::PrintTarget() { void CpuFeatures::PrintFeatures() { printf( - "ARMv7=%d VFP3=%d VFP32DREGS=%d SUDIV=%d UNALIGNED_ACCESSES=%d " + "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d " "MOVW_MOVT_IMMEDIATE_LOADS=%d", CpuFeatures::IsSupported(ARMv7), CpuFeatures::IsSupported(VFP3), CpuFeatures::IsSupported(VFP32DREGS), + CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV), CpuFeatures::IsSupported(UNALIGNED_ACCESSES), CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS)); @@ -378,6 +393,66 @@ MemOperand::MemOperand(Register rn, Register rm, } +NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) { + ASSERT((am == Offset) || (am == PostIndex)); + rn_ = rn; + rm_ = (am == Offset) ? pc : sp; + SetAlignment(align); +} + + +NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) { + rn_ = rn; + rm_ = rm; + SetAlignment(align); +} + + +void NeonMemOperand::SetAlignment(int align) { + switch (align) { + case 0: + align_ = 0; + break; + case 64: + align_ = 1; + break; + case 128: + align_ = 2; + break; + case 256: + align_ = 3; + break; + default: + UNREACHABLE(); + align_ = 0; + break; + } +} + + +NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { + base_ = base; + switch (registers_count) { + case 1: + type_ = nlt_1; + break; + case 2: + type_ = nlt_2; + break; + case 3: + type_ = nlt_3; + break; + case 4: + type_ = nlt_4; + break; + default: + UNREACHABLE(); + type_ = nlt_1; + break; + } +} + + // ----------------------------------------------------------------------------- // Specific instructions, constants, and masks. @@ -1546,6 +1621,107 @@ void Assembler::bfi(Register dst, } +void Assembler::pkhbt(Register dst, + Register src1, + const Operand& src2, + Condition cond ) { + // Instruction details available in ARM DDI 0406C.b, A8.8.125. + // cond(31-28) | 01101000(27-20) | Rn(19-16) | + // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0) + ASSERT(!dst.is(pc)); + ASSERT(!src1.is(pc)); + ASSERT(!src2.rm().is(pc)); + ASSERT(!src2.rm().is(no_reg)); + ASSERT(src2.rs().is(no_reg)); + ASSERT((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31)); + ASSERT(src2.shift_op() == LSL); + emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 | + src2.shift_imm_*B7 | B4 | src2.rm().code()); +} + + +void Assembler::pkhtb(Register dst, + Register src1, + const Operand& src2, + Condition cond) { + // Instruction details available in ARM DDI 0406C.b, A8.8.125. + // cond(31-28) | 01101000(27-20) | Rn(19-16) | + // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0) + ASSERT(!dst.is(pc)); + ASSERT(!src1.is(pc)); + ASSERT(!src2.rm().is(pc)); + ASSERT(!src2.rm().is(no_reg)); + ASSERT(src2.rs().is(no_reg)); + ASSERT((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32)); + ASSERT(src2.shift_op() == ASR); + int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_; + emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 | + asr*B7 | B6 | B4 | src2.rm().code()); +} + + +void Assembler::uxtb(Register dst, + const Operand& src, + Condition cond) { + // Instruction details available in ARM DDI 0406C.b, A8.8.274. + // cond(31-28) | 01101110(27-20) | 1111(19-16) | + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) + ASSERT(!dst.is(pc)); + ASSERT(!src.rm().is(pc)); + ASSERT(!src.rm().is(no_reg)); + ASSERT(src.rs().is(no_reg)); + ASSERT((src.shift_imm_ == 0) || + (src.shift_imm_ == 8) || + (src.shift_imm_ == 16) || + (src.shift_imm_ == 24)); + ASSERT(src.shift_op() == ROR); + emit(cond | 0x6E*B20 | 0xF*B16 | dst.code()*B12 | + ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code()); +} + + +void Assembler::uxtab(Register dst, + Register src1, + const Operand& src2, + Condition cond) { + // Instruction details available in ARM DDI 0406C.b, A8.8.271. + // cond(31-28) | 01101110(27-20) | Rn(19-16) | + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) + ASSERT(!dst.is(pc)); + ASSERT(!src1.is(pc)); + ASSERT(!src2.rm().is(pc)); + ASSERT(!src2.rm().is(no_reg)); + ASSERT(src2.rs().is(no_reg)); + ASSERT((src2.shift_imm_ == 0) || + (src2.shift_imm_ == 8) || + (src2.shift_imm_ == 16) || + (src2.shift_imm_ == 24)); + ASSERT(src2.shift_op() == ROR); + emit(cond | 0x6E*B20 | src1.code()*B16 | dst.code()*B12 | + ((src2.shift_imm_ >> 1) &0xC)*B8 | 7*B4 | src2.rm().code()); +} + + +void Assembler::uxtb16(Register dst, + const Operand& src, + Condition cond) { + // Instruction details available in ARM DDI 0406C.b, A8.8.275. + // cond(31-28) | 01101100(27-20) | 1111(19-16) | + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) + ASSERT(!dst.is(pc)); + ASSERT(!src.rm().is(pc)); + ASSERT(!src.rm().is(no_reg)); + ASSERT(src.rs().is(no_reg)); + ASSERT((src.shift_imm_ == 0) || + (src.shift_imm_ == 8) || + (src.shift_imm_ == 16) || + (src.shift_imm_ == 24)); + ASSERT(src.shift_op() == ROR); + emit(cond | 0x6C*B20 | 0xF*B16 | dst.code()*B12 | + ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code()); +} + + // Status register access instructions. void Assembler::mrs(Register dst, SRegister s, Condition cond) { ASSERT(!dst.is(pc)); @@ -1644,6 +1820,25 @@ void Assembler::strd(Register src1, Register src2, } +// Preload instructions. +void Assembler::pld(const MemOperand& address) { + // Instruction details available in ARM DDI 0406C.b, A8.8.128. + // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) | + // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) | + ASSERT(address.rm().is(no_reg)); + ASSERT(address.am() == Offset); + int U = B23; + int offset = address.offset(); + if (offset < 0) { + offset = -offset; + U = 0; + } + ASSERT(offset < 4096); + emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | address.rn().code()*B16 | + 0xf*B12 | offset); +} + + // Load/Store multiple instructions. void Assembler::ldm(BlockAddrMode am, Register base, @@ -2707,6 +2902,50 @@ void Assembler::vsqrt(const DwVfpRegister dst, } +// Support for NEON. + +void Assembler::vld1(NeonSize size, + const NeonListOperand& dst, + const NeonMemOperand& src) { + // Instruction details available in ARM DDI 0406C.b, A8.8.320. + // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | + // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) + ASSERT(CpuFeatures::IsSupported(NEON)); + int vd, d; + dst.base().split_code(&vd, &d); + emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | + dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); +} + + +void Assembler::vst1(NeonSize size, + const NeonListOperand& src, + const NeonMemOperand& dst) { + // Instruction details available in ARM DDI 0406C.b, A8.8.404. + // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | + // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) + ASSERT(CpuFeatures::IsSupported(NEON)); + int vd, d; + src.base().split_code(&vd, &d); + emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | + size*B6 | dst.align()*B4 | dst.rm().code()); +} + + +void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { + // Instruction details available in ARM DDI 0406C.b, A8.8.346. + // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) | + // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0) + ASSERT(CpuFeatures::IsSupported(NEON)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 | + (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm); +} + + // Pseudo instructions. void Assembler::nop(int type) { // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index 3000860..62dd94c 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -78,12 +78,15 @@ class CpuFeatures : public AllStatic { (!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f))); } + static unsigned cache_line_size() { return cache_line_size_; } + private: #ifdef DEBUG static bool initialized_; #endif static unsigned supported_; static unsigned found_by_runtime_probing_only_; + static unsigned cache_line_size_; friend class ExternalReference; DISALLOW_COPY_AND_ASSIGN(CpuFeatures); @@ -301,6 +304,36 @@ struct DwVfpRegister { typedef DwVfpRegister DoubleRegister; +// Quad word NEON register. +struct QwNeonRegister { + static const int kMaxNumRegisters = 16; + + static QwNeonRegister from_code(int code) { + QwNeonRegister r = { code }; + return r; + } + + bool is_valid() const { + return (0 <= code_) && (code_ < kMaxNumRegisters); + } + bool is(QwNeonRegister reg) const { return code_ == reg.code_; } + int code() const { + ASSERT(is_valid()); + return code_; + } + void split_code(int* vm, int* m) const { + ASSERT(is_valid()); + *m = (code_ & 0x10) >> 4; + *vm = code_ & 0x0F; + } + + int code_; +}; + + +typedef QwNeonRegister QuadRegister; + + // Support for the VFP registers s0 to s31 (d0 to d15). // Note that "s(N):s(N+1)" is the same as "d(N/2)". const SwVfpRegister s0 = { 0 }; @@ -370,6 +403,23 @@ const DwVfpRegister d29 = { 29 }; const DwVfpRegister d30 = { 30 }; const DwVfpRegister d31 = { 31 }; +const QwNeonRegister q0 = { 0 }; +const QwNeonRegister q1 = { 1 }; +const QwNeonRegister q2 = { 2 }; +const QwNeonRegister q3 = { 3 }; +const QwNeonRegister q4 = { 4 }; +const QwNeonRegister q5 = { 5 }; +const QwNeonRegister q6 = { 6 }; +const QwNeonRegister q7 = { 7 }; +const QwNeonRegister q8 = { 8 }; +const QwNeonRegister q9 = { 9 }; +const QwNeonRegister q10 = { 10 }; +const QwNeonRegister q11 = { 11 }; +const QwNeonRegister q12 = { 12 }; +const QwNeonRegister q13 = { 13 }; +const QwNeonRegister q14 = { 14 }; +const QwNeonRegister q15 = { 15 }; + // Aliases for double registers. Defined using #define instead of // "static const DwVfpRegister&" because Clang complains otherwise when a // compilation unit that includes this header doesn't use the variables. @@ -562,6 +612,42 @@ class MemOperand BASE_EMBEDDED { friend class Assembler; }; + +// Class NeonMemOperand represents a memory operand in load and +// store NEON instructions +class NeonMemOperand BASE_EMBEDDED { + public: + // [rn {:align}] Offset + // [rn {:align}]! PostIndex + explicit NeonMemOperand(Register rn, AddrMode am = Offset, int align = 0); + + // [rn {:align}], rm PostIndex + explicit NeonMemOperand(Register rn, Register rm, int align = 0); + + Register rn() const { return rn_; } + Register rm() const { return rm_; } + int align() const { return align_; } + + private: + void SetAlignment(int align); + + Register rn_; // base + Register rm_; // register increment + int align_; +}; + + +// Class NeonListOperand represents a list of NEON registers +class NeonListOperand BASE_EMBEDDED { + public: + explicit NeonListOperand(DoubleRegister base, int registers_count = 1); + DoubleRegister base() const { return base_; } + NeonListType type() const { return type_; } + private: + DoubleRegister base_; + NeonListType type_; +}; + extern const Instr kMovLrPc; extern const Instr kLdrPCMask; extern const Instr kLdrPCPattern; @@ -866,6 +952,19 @@ class Assembler : public AssemblerBase { void bfi(Register dst, Register src, int lsb, int width, Condition cond = al); + void pkhbt(Register dst, Register src1, const Operand& src2, + Condition cond = al); + + void pkhtb(Register dst, Register src1, const Operand& src2, + Condition cond = al); + + void uxtb(Register dst, const Operand& src, Condition cond = al); + + void uxtab(Register dst, Register src1, const Operand& src2, + Condition cond = al); + + void uxtb16(Register dst, const Operand& src, Condition cond = al); + // Status register access instructions void mrs(Register dst, SRegister s, Condition cond = al); @@ -887,6 +986,9 @@ class Assembler : public AssemblerBase { Register src2, const MemOperand& dst, Condition cond = al); + // Preload instructions + void pld(const MemOperand& address); + // Load/Store multiple instructions void ldm(BlockAddrMode am, Register base, RegList dst, Condition cond = al); void stm(BlockAddrMode am, Register base, RegList src, Condition cond = al); @@ -1097,6 +1199,17 @@ class Assembler : public AssemblerBase { const DwVfpRegister src, const Condition cond = al); + // Support for NEON. + // All these APIs support D0 to D31 and Q0 to Q15. + + void vld1(NeonSize size, + const NeonListOperand& dst, + const NeonMemOperand& src); + void vst1(NeonSize size, + const NeonListOperand& src, + const NeonMemOperand& dst); + void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src); + // Pseudo instructions // Different nop operations are used by the code generator to detect certain diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc index f411b13..4739541 100644 --- a/src/arm/codegen-arm.cc +++ b/src/arm/codegen-arm.cc @@ -112,6 +112,252 @@ UnaryMathFunction CreateExpFunction() { #endif } +#if defined(V8_HOST_ARCH_ARM) +OS::MemCopyUint8Function CreateMemCopyUint8Function( + OS::MemCopyUint8Function stub) { +#if defined(USE_SIMULATOR) + return stub; +#else + if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) { + return stub; + } + size_t actual_size; + byte* buffer = static_cast(OS::Allocate(1 * KB, &actual_size, true)); + if (buffer == NULL) return stub; + + MacroAssembler masm(NULL, buffer, static_cast(actual_size)); + + Register dest = r0; + Register src = r1; + Register chars = r2; + Register temp1 = r3; + Label less_4; + + if (CpuFeatures::IsSupported(NEON)) { + Label loop, less_256, less_128, less_64, less_32, _16_or_less, _8_or_less; + Label size_less_than_8; + __ pld(MemOperand(src, 0)); + + __ cmp(chars, Operand(8)); + __ b(lt, &size_less_than_8); + __ cmp(chars, Operand(32)); + __ b(lt, &less_32); + if (CpuFeatures::cache_line_size() == 32) { + __ pld(MemOperand(src, 32)); + } + __ cmp(chars, Operand(64)); + __ b(lt, &less_64); + __ pld(MemOperand(src, 64)); + if (CpuFeatures::cache_line_size() == 32) { + __ pld(MemOperand(src, 96)); + } + __ cmp(chars, Operand(128)); + __ b(lt, &less_128); + __ pld(MemOperand(src, 128)); + if (CpuFeatures::cache_line_size() == 32) { + __ pld(MemOperand(src, 160)); + } + __ pld(MemOperand(src, 192)); + if (CpuFeatures::cache_line_size() == 32) { + __ pld(MemOperand(src, 224)); + } + __ cmp(chars, Operand(256)); + __ b(lt, &less_256); + __ sub(chars, chars, Operand(256)); + + __ bind(&loop); + __ pld(MemOperand(src, 256)); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); + if (CpuFeatures::cache_line_size() == 32) { + __ pld(MemOperand(src, 256)); + } + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); + __ sub(chars, chars, Operand(64), SetCC); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); + __ b(ge, &loop); + __ add(chars, chars, Operand(256)); + + __ bind(&less_256); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); + __ sub(chars, chars, Operand(128)); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); + __ cmp(chars, Operand(64)); + __ b(lt, &less_64); + + __ bind(&less_128); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); + __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex)); + __ sub(chars, chars, Operand(64)); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); + __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex)); + + __ bind(&less_64); + __ cmp(chars, Operand(32)); + __ b(lt, &less_32); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex)); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex)); + __ sub(chars, chars, Operand(32)); + + __ bind(&less_32); + __ cmp(chars, Operand(16)); + __ b(le, &_16_or_less); + __ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(src, PostIndex)); + __ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex)); + __ sub(chars, chars, Operand(16)); + + __ bind(&_16_or_less); + __ cmp(chars, Operand(8)); + __ b(le, &_8_or_less); + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex)); + __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest, PostIndex)); + __ sub(chars, chars, Operand(8)); + + // Do a last copy which may overlap with the previous copy (up to 8 bytes). + __ bind(&_8_or_less); + __ rsb(chars, chars, Operand(8)); + __ sub(src, src, Operand(chars)); + __ sub(dest, dest, Operand(chars)); + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src)); + __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest)); + + __ Ret(); + + __ bind(&size_less_than_8); + + __ bic(temp1, chars, Operand(0x3), SetCC); + __ b(&less_4, eq); + __ ldr(temp1, MemOperand(src, 4, PostIndex)); + __ str(temp1, MemOperand(dest, 4, PostIndex)); + } else { + Register temp2 = ip; + Label loop; + + __ bic(temp2, chars, Operand(0x3), SetCC); + __ b(&less_4, eq); + __ add(temp2, dest, temp2); + + __ bind(&loop); + __ ldr(temp1, MemOperand(src, 4, PostIndex)); + __ str(temp1, MemOperand(dest, 4, PostIndex)); + __ cmp(dest, temp2); + __ b(&loop, ne); + } + + __ bind(&less_4); + __ mov(chars, Operand(chars, LSL, 31), SetCC); + // bit0 => Z (ne), bit1 => C (cs) + __ ldrh(temp1, MemOperand(src, 2, PostIndex), cs); + __ strh(temp1, MemOperand(dest, 2, PostIndex), cs); + __ ldrb(temp1, MemOperand(src), ne); + __ strb(temp1, MemOperand(dest), ne); + __ Ret(); + + CodeDesc desc; + masm.GetCode(&desc); + ASSERT(!RelocInfo::RequiresRelocation(desc)); + + CPU::FlushICache(buffer, actual_size); + OS::ProtectCode(buffer, actual_size); + return FUNCTION_CAST(buffer); +#endif +} + + +// Convert 8 to 16. The number of character to copy must be at least 8. +OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function( + OS::MemCopyUint16Uint8Function stub) { +#if defined(USE_SIMULATOR) + return stub; +#else + if (Serializer::enabled() || !CpuFeatures::IsSupported(UNALIGNED_ACCESSES)) { + return stub; + } + size_t actual_size; + byte* buffer = static_cast(OS::Allocate(1 * KB, &actual_size, true)); + if (buffer == NULL) return stub; + + MacroAssembler masm(NULL, buffer, static_cast(actual_size)); + + Register dest = r0; + Register src = r1; + Register chars = r2; + if (CpuFeatures::IsSupported(NEON)) { + Register temp = r3; + Label loop; + + __ bic(temp, chars, Operand(0x7)); + __ sub(chars, chars, Operand(temp)); + __ add(temp, dest, Operand(temp, LSL, 1)); + + __ bind(&loop); + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex)); + __ vmovl(NeonU8, q0, d0); + __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex)); + __ cmp(dest, temp); + __ b(&loop, ne); + + // Do a last copy which will overlap with the previous copy (1 to 8 bytes). + __ rsb(chars, chars, Operand(8)); + __ sub(src, src, Operand(chars)); + __ sub(dest, dest, Operand(chars, LSL, 1)); + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src)); + __ vmovl(NeonU8, q0, d0); + __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest)); + __ Ret(); + } else { + Register temp1 = r3; + Register temp2 = ip; + Register temp3 = lr; + Register temp4 = r4; + Label loop; + Label not_two; + + __ Push(lr, r4); + __ bic(temp2, chars, Operand(0x3)); + __ add(temp2, dest, Operand(temp2, LSL, 1)); + + __ bind(&loop); + __ ldr(temp1, MemOperand(src, 4, PostIndex)); + __ uxtb16(temp3, Operand(temp1, ROR, 0)); + __ uxtb16(temp4, Operand(temp1, ROR, 8)); + __ pkhbt(temp1, temp3, Operand(temp4, LSL, 16)); + __ str(temp1, MemOperand(dest)); + __ pkhtb(temp1, temp4, Operand(temp3, ASR, 16)); + __ str(temp1, MemOperand(dest, 4)); + __ add(dest, dest, Operand(8)); + __ cmp(dest, temp2); + __ b(&loop, ne); + + __ mov(chars, Operand(chars, LSL, 31), SetCC); // bit0 => ne, bit1 => cs + __ b(¬_two, cc); + __ ldrh(temp1, MemOperand(src, 2, PostIndex)); + __ uxtb(temp3, Operand(temp1, ROR, 8)); + __ mov(temp3, Operand(temp3, LSL, 16)); + __ uxtab(temp3, temp3, Operand(temp1, ROR, 0)); + __ str(temp3, MemOperand(dest, 4, PostIndex)); + __ bind(¬_two); + __ ldrb(temp1, MemOperand(src), ne); + __ strh(temp1, MemOperand(dest), ne); + __ Pop(pc, r4); + } + + CodeDesc desc; + masm.GetCode(&desc); + + CPU::FlushICache(buffer, actual_size); + OS::ProtectCode(buffer, actual_size); + + return FUNCTION_CAST(buffer); +#endif +} +#endif #undef __ diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h index e21055f..9bfccf8 100644 --- a/src/arm/constants-arm.h +++ b/src/arm/constants-arm.h @@ -33,22 +33,6 @@ #error ARM EABI support is required. #endif -#if defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7__) -# define CAN_USE_ARMV7_INSTRUCTIONS 1 -#ifndef CAN_USE_VFP3_INSTRUCTIONS -# define CAN_USE_VFP3_INSTRUCTIONS -#endif -#endif - -// Simulator should support unaligned access by default. -#if !defined(__arm__) -# ifndef CAN_USE_UNALIGNED_ACCESSES -# define CAN_USE_UNALIGNED_ACCESSES 1 -# endif -#endif - namespace v8 { namespace internal { @@ -331,6 +315,32 @@ enum LFlag { }; +// NEON data type +enum NeonDataType { + NeonS8 = 0x1, // U = 0, imm3 = 0b001 + NeonS16 = 0x2, // U = 0, imm3 = 0b010 + NeonS32 = 0x4, // U = 0, imm3 = 0b100 + NeonU8 = 1 << 24 | 0x1, // U = 1, imm3 = 0b001 + NeonU16 = 1 << 24 | 0x2, // U = 1, imm3 = 0b010 + NeonU32 = 1 << 24 | 0x4, // U = 1, imm3 = 0b100 + NeonDataTypeSizeMask = 0x7, + NeonDataTypeUMask = 1 << 24 +}; + +enum NeonListType { + nlt_1 = 0x7, + nlt_2 = 0xA, + nlt_3 = 0x6, + nlt_4 = 0x2 +}; + +enum NeonSize { + Neon8 = 0x0, + Neon16 = 0x1, + Neon32 = 0x2, + Neon64 = 0x4 +}; + // ----------------------------------------------------------------------------- // Supervisor Call (svc) specific support. @@ -573,6 +583,7 @@ class Instruction { DECLARE_STATIC_TYPED_ACCESSOR(Condition, ConditionField); inline int TypeValue() const { return Bits(27, 25); } + inline int SpecialValue() const { return Bits(27, 23); } inline int RnValue() const { return Bits(19, 16); } DECLARE_STATIC_ACCESSOR(RnValue); diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index b0f3ec0..fd986fd 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -113,6 +113,8 @@ class Decoder { // Handle formatting of instructions and their options. int FormatRegister(Instruction* instr, const char* option); + void FormatNeonList(int Vd, int type); + void FormatNeonMemory(int Rn, int align, int Rm); int FormatOption(Instruction* instr, const char* option); void Format(Instruction* instr, const char* format); void Unknown(Instruction* instr); @@ -133,6 +135,8 @@ class Decoder { void DecodeTypeVFP(Instruction* instr); void DecodeType6CoprocessorIns(Instruction* instr); + void DecodeSpecialCondition(Instruction* instr); + void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr); void DecodeVCMP(Instruction* instr); void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr); @@ -419,6 +423,41 @@ int Decoder::FormatVFPinstruction(Instruction* instr, const char* format) { } +void Decoder::FormatNeonList(int Vd, int type) { + if (type == nlt_1) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "{d%d}", Vd); + } else if (type == nlt_2) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "{d%d, d%d}", Vd, Vd + 1); + } else if (type == nlt_3) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "{d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2); + } else if (type == nlt_4) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "{d%d, d%d, d%d, d%d}", Vd, Vd + 1, Vd + 2, Vd + 3); + } +} + + +void Decoder::FormatNeonMemory(int Rn, int align, int Rm) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "[r%d", Rn); + if (align != 0) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + ":%d", (1 << align) << 6); + } + if (Rm == 15) { + Print("]"); + } else if (Rm == 13) { + Print("]!"); + } else { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "], r%d", Rm); + } +} + + // Print the movw or movt instruction. void Decoder::PrintMovwMovt(Instruction* instr) { int imm = instr->ImmedMovwMovtValue(); @@ -982,15 +1021,107 @@ void Decoder::DecodeType3(Instruction* instr) { break; } case ia_x: { - if (instr->HasW()) { - VERIFY(instr->Bits(5, 4) == 0x1); - if (instr->Bit(22) == 0x1) { - Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat"); + if (instr->Bit(4) == 0) { + Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm"); + } else { + if (instr->Bit(5) == 0) { + switch (instr->Bits(22, 21)) { + case 0: + if (instr->Bit(20) == 0) { + if (instr->Bit(6) == 0) { + Format(instr, "pkhbt'cond 'rd, 'rn, 'rm, lsl #'imm05@07"); + } else { + if (instr->Bits(11, 7) == 0) { + Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #32"); + } else { + Format(instr, "pkhtb'cond 'rd, 'rn, 'rm, asr #'imm05@07"); + } + } + } else { + UNREACHABLE(); + } + break; + case 1: + UNREACHABLE(); + break; + case 2: + UNREACHABLE(); + break; + case 3: + Format(instr, "usat 'rd, #'imm05@16, 'rm'shift_sat"); + break; + } } else { - UNREACHABLE(); // SSAT. + switch (instr->Bits(22, 21)) { + case 0: + UNREACHABLE(); + break; + case 1: + UNREACHABLE(); + break; + case 2: + if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) { + if (instr->Bits(19, 16) == 0xF) { + switch (instr->Bits(11, 10)) { + case 0: + Format(instr, "uxtb16'cond 'rd, 'rm, ror #0"); + break; + case 1: + Format(instr, "uxtb16'cond 'rd, 'rm, ror #8"); + break; + case 2: + Format(instr, "uxtb16'cond 'rd, 'rm, ror #16"); + break; + case 3: + Format(instr, "uxtb16'cond 'rd, 'rm, ror #24"); + break; + } + } else { + UNREACHABLE(); + } + } else { + UNREACHABLE(); + } + break; + case 3: + if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) { + if (instr->Bits(19, 16) == 0xF) { + switch (instr->Bits(11, 10)) { + case 0: + Format(instr, "uxtb'cond 'rd, 'rm, ror #0"); + break; + case 1: + Format(instr, "uxtb'cond 'rd, 'rm, ror #8"); + break; + case 2: + Format(instr, "uxtb'cond 'rd, 'rm, ror #16"); + break; + case 3: + Format(instr, "uxtb'cond 'rd, 'rm, ror #24"); + break; + } + } else { + switch (instr->Bits(11, 10)) { + case 0: + Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #0"); + break; + case 1: + Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #8"); + break; + case 2: + Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #16"); + break; + case 3: + Format(instr, "uxtab'cond 'rd, 'rn, 'rm, ror #24"); + break; + } + } + } else { + UNREACHABLE(); + } + break; + } } - } else { - Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm"); } break; } @@ -1423,6 +1554,91 @@ void Decoder::DecodeType6CoprocessorIns(Instruction* instr) { } } + +void Decoder::DecodeSpecialCondition(Instruction* instr) { + switch (instr->SpecialValue()) { + case 5: + if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && + (instr->Bit(4) == 1)) { + // vmovl signed + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Vm = (instr->Bit(5) << 4) | instr->VmValue(); + int imm3 = instr->Bits(21, 19); + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "vmovl.s%d q%d, d%d", imm3*8, Vd, Vm); + } else { + Unknown(instr); + } + break; + case 7: + if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && + (instr->Bit(4) == 1)) { + // vmovl unsigned + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Vm = (instr->Bit(5) << 4) | instr->VmValue(); + int imm3 = instr->Bits(21, 19); + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "vmovl.u%d q%d, d%d", imm3*8, Vd, Vm); + } else { + Unknown(instr); + } + break; + case 8: + if (instr->Bits(21, 20) == 0) { + // vst1 + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Rn = instr->VnValue(); + int type = instr->Bits(11, 8); + int size = instr->Bits(7, 6); + int align = instr->Bits(5, 4); + int Rm = instr->VmValue(); + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "vst1.%d ", (1 << size) << 3); + FormatNeonList(Vd, type); + Print(", "); + FormatNeonMemory(Rn, align, Rm); + } else if (instr->Bits(21, 20) == 2) { + // vld1 + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Rn = instr->VnValue(); + int type = instr->Bits(11, 8); + int size = instr->Bits(7, 6); + int align = instr->Bits(5, 4); + int Rm = instr->VmValue(); + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "vld1.%d ", (1 << size) << 3); + FormatNeonList(Vd, type); + Print(", "); + FormatNeonMemory(Rn, align, Rm); + } else { + Unknown(instr); + } + break; + case 0xA: + case 0xB: + if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) { + int Rn = instr->Bits(19, 16); + int offset = instr->Bits(11, 0); + if (offset == 0) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "pld [r%d]", Rn); + } else if (instr->Bit(23) == 0) { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "pld [r%d, #-%d]", Rn, offset); + } else { + out_buffer_pos_ += OS::SNPrintF(out_buffer_ + out_buffer_pos_, + "pld [r%d, #+%d]", Rn, offset); + } + } else { + Unknown(instr); + } + break; + default: + Unknown(instr); + break; + } +} + #undef VERIFIY bool Decoder::IsConstantPoolAt(byte* instr_ptr) { @@ -1449,7 +1665,7 @@ int Decoder::InstructionDecode(byte* instr_ptr) { "%08x ", instr->InstructionBits()); if (instr->ConditionField() == kSpecialCondition) { - Unknown(instr); + DecodeSpecialCondition(instr); return Instruction::kInstrSize; } int instruction_bits = *(reinterpret_cast(instr_ptr)); diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index b07e7be..00af777 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -919,6 +919,54 @@ void Simulator::set_dw_register(int dreg, const int* dbl) { } +void Simulator::get_d_register(int dreg, uint64_t* value) { + ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); + memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value)); +} + + +void Simulator::set_d_register(int dreg, const uint64_t* value) { + ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); + memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value)); +} + + +void Simulator::get_d_register(int dreg, uint32_t* value) { + ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); + memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); +} + + +void Simulator::set_d_register(int dreg, const uint32_t* value) { + ASSERT((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); + memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); +} + + +void Simulator::get_q_register(int qreg, uint64_t* value) { + ASSERT((qreg >= 0) && (qreg < num_q_registers)); + memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 2); +} + + +void Simulator::set_q_register(int qreg, const uint64_t* value) { + ASSERT((qreg >= 0) && (qreg < num_q_registers)); + memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 2); +} + + +void Simulator::get_q_register(int qreg, uint32_t* value) { + ASSERT((qreg >= 0) && (qreg < num_q_registers)); + memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 4); +} + + +void Simulator::set_q_register(int qreg, const uint32_t* value) { + ASSERT((qreg >= 0) && (qreg < num_q_registers)); + memcpy(vfp_registers_ + qreg * 4, value, sizeof(*value) * 4); +} + + // Raw access to the PC register. void Simulator::set_pc(int32_t value) { pc_modified_ = true; @@ -2599,36 +2647,148 @@ void Simulator::DecodeType3(Instruction* instr) { break; } case ia_x: { - if (instr->HasW()) { - ASSERT(instr->Bits(5, 4) == 0x1); - - if (instr->Bit(22) == 0x1) { // USAT. - int32_t sat_pos = instr->Bits(20, 16); - int32_t sat_val = (1 << sat_pos) - 1; - int32_t shift = instr->Bits(11, 7); - int32_t shift_type = instr->Bit(6); - int32_t rm_val = get_register(instr->RmValue()); - if (shift_type == 0) { // LSL - rm_val <<= shift; - } else { // ASR - rm_val >>= shift; + if (instr->Bit(4) == 0) { + // Memop. + } else { + if (instr->Bit(5) == 0) { + switch (instr->Bits(22, 21)) { + case 0: + if (instr->Bit(20) == 0) { + if (instr->Bit(6) == 0) { + // Pkhbt. + uint32_t rn_val = get_register(rn); + uint32_t rm_val = get_register(instr->RmValue()); + int32_t shift = instr->Bits(11, 7); + rm_val <<= shift; + set_register(rd, (rn_val & 0xFFFF) | (rm_val & 0xFFFF0000U)); + } else { + // Pkhtb. + uint32_t rn_val = get_register(rn); + int32_t rm_val = get_register(instr->RmValue()); + int32_t shift = instr->Bits(11, 7); + if (shift == 0) { + shift = 32; + } + rm_val >>= shift; + set_register(rd, (rn_val & 0xFFFF0000U) | (rm_val & 0xFFFF)); + } + } else { + UNIMPLEMENTED(); + } + break; + case 1: + UNIMPLEMENTED(); + break; + case 2: + UNIMPLEMENTED(); + break; + case 3: { + // Usat. + int32_t sat_pos = instr->Bits(20, 16); + int32_t sat_val = (1 << sat_pos) - 1; + int32_t shift = instr->Bits(11, 7); + int32_t shift_type = instr->Bit(6); + int32_t rm_val = get_register(instr->RmValue()); + if (shift_type == 0) { // LSL + rm_val <<= shift; + } else { // ASR + rm_val >>= shift; + } + // If saturation occurs, the Q flag should be set in the CPSR. + // There is no Q flag yet, and no instruction (MRS) to read the + // CPSR directly. + if (rm_val > sat_val) { + rm_val = sat_val; + } else if (rm_val < 0) { + rm_val = 0; + } + set_register(rd, rm_val); + break; + } } - // If saturation occurs, the Q flag should be set in the CPSR. - // There is no Q flag yet, and no instruction (MRS) to read the - // CPSR directly. - if (rm_val > sat_val) { - rm_val = sat_val; - } else if (rm_val < 0) { - rm_val = 0; + } else { + switch (instr->Bits(22, 21)) { + case 0: + UNIMPLEMENTED(); + break; + case 1: + UNIMPLEMENTED(); + break; + case 2: + if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) { + if (instr->Bits(19, 16) == 0xF) { + // Uxtb16. + uint32_t rm_val = get_register(instr->RmValue()); + int32_t rotate = instr->Bits(11, 10); + switch (rotate) { + case 0: + break; + case 1: + rm_val = (rm_val >> 8) | (rm_val << 24); + break; + case 2: + rm_val = (rm_val >> 16) | (rm_val << 16); + break; + case 3: + rm_val = (rm_val >> 24) | (rm_val << 8); + break; + } + set_register(rd, + (rm_val & 0xFF) | (rm_val & 0xFF0000)); + } else { + UNIMPLEMENTED(); + } + } else { + UNIMPLEMENTED(); + } + break; + case 3: + if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) { + if (instr->Bits(19, 16) == 0xF) { + // Uxtb. + uint32_t rm_val = get_register(instr->RmValue()); + int32_t rotate = instr->Bits(11, 10); + switch (rotate) { + case 0: + break; + case 1: + rm_val = (rm_val >> 8) | (rm_val << 24); + break; + case 2: + rm_val = (rm_val >> 16) | (rm_val << 16); + break; + case 3: + rm_val = (rm_val >> 24) | (rm_val << 8); + break; + } + set_register(rd, (rm_val & 0xFF)); + } else { + // Uxtab. + uint32_t rn_val = get_register(rn); + uint32_t rm_val = get_register(instr->RmValue()); + int32_t rotate = instr->Bits(11, 10); + switch (rotate) { + case 0: + break; + case 1: + rm_val = (rm_val >> 8) | (rm_val << 24); + break; + case 2: + rm_val = (rm_val >> 16) | (rm_val << 16); + break; + case 3: + rm_val = (rm_val >> 24) | (rm_val << 8); + break; + } + set_register(rd, rn_val + (rm_val & 0xFF)); + } + } else { + UNIMPLEMENTED(); + } + break; } - set_register(rd, rm_val); - } else { // SSAT. - UNIMPLEMENTED(); } return; - } else { - Format(instr, "'memop'cond'b 'rd, ['rn], +'shift_rm"); - UNIMPLEMENTED(); } break; } @@ -3352,6 +3512,156 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { } +void Simulator::DecodeSpecialCondition(Instruction* instr) { + switch (instr->SpecialValue()) { + case 5: + if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && + (instr->Bit(4) == 1)) { + // vmovl signed + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Vm = (instr->Bit(5) << 4) | instr->VmValue(); + int imm3 = instr->Bits(21, 19); + if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); + int esize = 8 * imm3; + int elements = 64 / esize; + int8_t from[8]; + get_d_register(Vm, reinterpret_cast(from)); + int16_t to[8]; + int e = 0; + while (e < elements) { + to[e] = from[e]; + e++; + } + set_q_register(Vd, reinterpret_cast(to)); + } else { + UNIMPLEMENTED(); + } + break; + case 7: + if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && + (instr->Bit(4) == 1)) { + // vmovl unsigned + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Vm = (instr->Bit(5) << 4) | instr->VmValue(); + int imm3 = instr->Bits(21, 19); + if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); + int esize = 8 * imm3; + int elements = 64 / esize; + uint8_t from[8]; + get_d_register(Vm, reinterpret_cast(from)); + uint16_t to[8]; + int e = 0; + while (e < elements) { + to[e] = from[e]; + e++; + } + set_q_register(Vd, reinterpret_cast(to)); + } else { + UNIMPLEMENTED(); + } + break; + case 8: + if (instr->Bits(21, 20) == 0) { + // vst1 + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Rn = instr->VnValue(); + int type = instr->Bits(11, 8); + int Rm = instr->VmValue(); + int32_t address = get_register(Rn); + int regs = 0; + switch (type) { + case nlt_1: + regs = 1; + break; + case nlt_2: + regs = 2; + break; + case nlt_3: + regs = 3; + break; + case nlt_4: + regs = 4; + break; + default: + UNIMPLEMENTED(); + break; + } + int r = 0; + while (r < regs) { + uint32_t data[2]; + get_d_register(Vd + r, data); + WriteW(address, data[0], instr); + WriteW(address + 4, data[1], instr); + address += 8; + r++; + } + if (Rm != 15) { + if (Rm == 13) { + set_register(Rn, address); + } else { + set_register(Rn, get_register(Rn) + get_register(Rm)); + } + } + } else if (instr->Bits(21, 20) == 2) { + // vld1 + int Vd = (instr->Bit(22) << 4) | instr->VdValue(); + int Rn = instr->VnValue(); + int type = instr->Bits(11, 8); + int Rm = instr->VmValue(); + int32_t address = get_register(Rn); + int regs = 0; + switch (type) { + case nlt_1: + regs = 1; + break; + case nlt_2: + regs = 2; + break; + case nlt_3: + regs = 3; + break; + case nlt_4: + regs = 4; + break; + default: + UNIMPLEMENTED(); + break; + } + int r = 0; + while (r < regs) { + uint32_t data[2]; + data[0] = ReadW(address, instr); + data[1] = ReadW(address + 4, instr); + set_d_register(Vd + r, data); + address += 8; + r++; + } + if (Rm != 15) { + if (Rm == 13) { + set_register(Rn, address); + } else { + set_register(Rn, get_register(Rn) + get_register(Rm)); + } + } + } else { + UNIMPLEMENTED(); + } + break; + case 0xA: + case 0xB: + if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xf)) { + // pld: ignore instruction. + } else { + UNIMPLEMENTED(); + } + break; + default: + UNIMPLEMENTED(); + break; + } +} + + // Executes the current instruction. void Simulator::InstructionDecode(Instruction* instr) { if (v8::internal::FLAG_check_icache) { @@ -3368,7 +3678,7 @@ void Simulator::InstructionDecode(Instruction* instr) { PrintF(" 0x%08x %s\n", reinterpret_cast(instr), buffer.start()); } if (instr->ConditionField() == kSpecialCondition) { - UNIMPLEMENTED(); + DecodeSpecialCondition(instr); } else if (ConditionallyExecute(instr)) { switch (instr->TypeValue()) { case 0: diff --git a/src/arm/simulator-arm.h b/src/arm/simulator-arm.h index 45ae999..2a458f9 100644 --- a/src/arm/simulator-arm.h +++ b/src/arm/simulator-arm.h @@ -144,7 +144,10 @@ class Simulator { d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, - num_d_registers = 32 + num_d_registers = 32, + q0 = 0, q1, q2, q3, q4, q5, q6, q7, + q8, q9, q10, q11, q12, q13, q14, q15, + num_q_registers = 16 }; explicit Simulator(Isolate* isolate); @@ -163,6 +166,15 @@ class Simulator { void set_dw_register(int dreg, const int* dbl); // Support for VFP. + void get_d_register(int dreg, uint64_t* value); + void set_d_register(int dreg, const uint64_t* value); + void get_d_register(int dreg, uint32_t* value); + void set_d_register(int dreg, const uint32_t* value); + void get_q_register(int qreg, uint64_t* value); + void set_q_register(int qreg, const uint64_t* value); + void get_q_register(int qreg, uint32_t* value); + void set_q_register(int qreg, const uint32_t* value); + void set_s_register(int reg, unsigned int value); unsigned int get_s_register(int reg) const; @@ -328,6 +340,7 @@ class Simulator { // Support for VFP. void DecodeTypeVFP(Instruction* instr); void DecodeType6CoprocessorIns(Instruction* instr); + void DecodeSpecialCondition(Instruction* instr); void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr); void DecodeVCMP(Instruction* instr); diff --git a/src/flag-definitions.h b/src/flag-definitions.h index a0f907d..00d313f 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -348,6 +348,8 @@ DEFINE_bool(enable_vfp3, ENABLE_VFP3_DEFAULT, "enable use of VFP3 instructions if available") DEFINE_bool(enable_armv7, ENABLE_ARMV7_DEFAULT, "enable use of ARMv7 instructions if available (ARM only)") +DEFINE_bool(enable_neon, true, + "enable use of NEON instructions if available (ARM only)") DEFINE_bool(enable_sudiv, true, "enable use of SDIV and UDIV instructions if available (ARM only)") DEFINE_bool(enable_movw_movt, false, diff --git a/src/globals.h b/src/globals.h index baacf52..f00e676 100644 --- a/src/globals.h +++ b/src/globals.h @@ -89,12 +89,6 @@ namespace internal { #elif defined(__ARMEL__) #define V8_HOST_ARCH_ARM 1 #define V8_HOST_ARCH_32_BIT 1 -// Some CPU-OS combinations allow unaligned access on ARM. We assume -// that unaligned accesses are not allowed unless the build system -// defines the CAN_USE_UNALIGNED_ACCESSES macro to be non-zero. -#if CAN_USE_UNALIGNED_ACCESSES -#define V8_HOST_CAN_READ_UNALIGNED 1 -#endif #elif defined(__MIPSEL__) #define V8_HOST_ARCH_MIPS 1 #define V8_HOST_ARCH_32_BIT 1 @@ -102,6 +96,16 @@ namespace internal { #error Host architecture was not detected as supported by v8 #endif +#if defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7__) +# define CAN_USE_ARMV7_INSTRUCTIONS 1 +# ifndef CAN_USE_VFP3_INSTRUCTIONS +# define CAN_USE_VFP3_INSTRUCTIONS +# endif +#endif + + // Target architecture detection. This may be set externally. If not, detect // in the same way as the host architecture, that is, target the native // environment as presented by the compiler. diff --git a/src/platform-linux.cc b/src/platform-linux.cc index 2c6a36c..007b1e7 100644 --- a/src/platform-linux.cc +++ b/src/platform-linux.cc @@ -146,6 +146,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) { case VFP3: search_string = "vfpv3"; break; + case NEON: + search_string = "neon"; + break; case ARMv7: search_string = "ARMv7"; break; @@ -200,6 +203,36 @@ CpuImplementer OS::GetCpuImplementer() { } +CpuPart OS::GetCpuPart(CpuImplementer implementer) { + static bool use_cached_value = false; + static CpuPart cached_value = CPU_UNKNOWN; + if (use_cached_value) { + return cached_value; + } + if (implementer == ARM_IMPLEMENTER) { + if (CPUInfoContainsString("CPU part\t: 0xc0f")) { + cached_value = CORTEX_A15; + } else if (CPUInfoContainsString("CPU part\t: 0xc0c")) { + cached_value = CORTEX_A12; + } else if (CPUInfoContainsString("CPU part\t: 0xc09")) { + cached_value = CORTEX_A9; + } else if (CPUInfoContainsString("CPU part\t: 0xc08")) { + cached_value = CORTEX_A8; + } else if (CPUInfoContainsString("CPU part\t: 0xc07")) { + cached_value = CORTEX_A7; + } else if (CPUInfoContainsString("CPU part\t: 0xc05")) { + cached_value = CORTEX_A5; + } else { + cached_value = CPU_UNKNOWN; + } + } else { + cached_value = CPU_UNKNOWN; + } + use_cached_value = true; + return cached_value; +} + + bool OS::ArmUsingHardFloat() { // GCC versions 4.6 and above define __ARM_PCS or __ARM_PCS_VFP to specify // the Floating Point ABI used (PCS stands for Procedure Call Standard). diff --git a/src/platform-nullos.cc b/src/platform-nullos.cc index 1b481f4..dd5a3dd 100644 --- a/src/platform-nullos.cc +++ b/src/platform-nullos.cc @@ -220,6 +220,11 @@ CpuImplementer OS::GetCpuImplementer() { } +CpuPart OS::GetCpuPart(CpuImplementer implementer) { + UNIMPLEMENTED(); +} + + bool OS::ArmCpuHasFeature(CpuFeature feature) { UNIMPLEMENTED(); } diff --git a/src/platform-posix.cc b/src/platform-posix.cc index 6e83b04..9ed02cf 100644 --- a/src/platform-posix.cc +++ b/src/platform-posix.cc @@ -341,7 +341,26 @@ void OS::MemMove(void* dest, const void* src, size_t size) { (*memmove_function)(dest, src, size); } -#endif // V8_TARGET_ARCH_IA32 +#elif defined(V8_HOST_ARCH_ARM) +void OS::MemCopyUint16Uint8Wrapper(uint16_t* dest, + const uint8_t* src, + size_t chars) { + uint16_t *limit = dest + chars; + while (dest < limit) { + *dest++ = static_cast(*src++); + } +} + + +OS::MemCopyUint8Function OS::memcopy_uint8_function = &OS::MemCopyUint8Wrapper; +OS::MemCopyUint16Uint8Function OS::memcopy_uint16_uint8_function = + &OS::MemCopyUint16Uint8Wrapper; +// Defined in codegen-arm.cc. +OS::MemCopyUint8Function CreateMemCopyUint8Function( + OS::MemCopyUint8Function stub); +OS::MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function( + OS::MemCopyUint16Uint8Function stub); +#endif void POSIXPostSetUp() { @@ -350,6 +369,11 @@ void POSIXPostSetUp() { if (generated_memmove != NULL) { memmove_function = generated_memmove; } +#elif defined(V8_HOST_ARCH_ARM) + OS::memcopy_uint8_function = + CreateMemCopyUint8Function(&OS::MemCopyUint8Wrapper); + OS::memcopy_uint16_uint8_function = + CreateMemCopyUint16Uint8Function(&OS::MemCopyUint16Uint8Wrapper); #endif init_fast_sin_function(); init_fast_cos_function(); diff --git a/src/platform.h b/src/platform.h index 24d21cb..211be39 100644 --- a/src/platform.h +++ b/src/platform.h @@ -315,6 +315,9 @@ class OS { // Support runtime detection of Cpu implementer static CpuImplementer GetCpuImplementer(); + // Support runtime detection of Cpu implementer + static CpuPart GetCpuPart(CpuImplementer implementer); + // Support runtime detection of VFP3 on ARM CPUs. static bool ArmCpuHasFeature(CpuFeature feature); @@ -343,7 +346,42 @@ class OS { static void MemCopy(void* dest, const void* src, size_t size) { MemMove(dest, src, size); } -#else // V8_TARGET_ARCH_IA32 +#elif defined(V8_HOST_ARCH_ARM) + typedef void (*MemCopyUint8Function)(uint8_t* dest, + const uint8_t* src, + size_t size); + static MemCopyUint8Function memcopy_uint8_function; + static void MemCopyUint8Wrapper(uint8_t* dest, + const uint8_t* src, + size_t chars) { + memcpy(dest, src, chars); + } + // For values < 16, the assembler function is slower than the inlined C code. + static const int kMinComplexMemCopy = 16; + static void MemCopy(void* dest, const void* src, size_t size) { + (*memcopy_uint8_function)(reinterpret_cast(dest), + reinterpret_cast(src), + size); + } + static void MemMove(void* dest, const void* src, size_t size) { + memmove(dest, src, size); + } + + typedef void (*MemCopyUint16Uint8Function)(uint16_t* dest, + const uint8_t* src, + size_t size); + static MemCopyUint16Uint8Function memcopy_uint16_uint8_function; + static void MemCopyUint16Uint8Wrapper(uint16_t* dest, + const uint8_t* src, + size_t chars); + // For values < 12, the assembler function is slower than the inlined C code. + static const int kMinComplexConvertMemCopy = 12; + static void MemCopyUint16Uint8(uint16_t* dest, + const uint8_t* src, + size_t size) { + (*memcopy_uint16_uint8_function)(dest, src, size); + } +#else // Copy memory area to disjoint memory area. static void MemCopy(void* dest, const void* src, size_t size) { memcpy(dest, src, size); diff --git a/src/v8globals.h b/src/v8globals.h index 4932da9..c3f1f01 100644 --- a/src/v8globals.h +++ b/src/v8globals.h @@ -418,6 +418,17 @@ enum CpuImplementer { }; +enum CpuPart { + CPU_UNKNOWN, + CORTEX_A15, + CORTEX_A12, + CORTEX_A9, + CORTEX_A8, + CORTEX_A7, + CORTEX_A5 +}; + + // Feature flags bit positions. They are mostly based on the CPUID spec. // (We assign CPUID itself to one of the currently reserved bits -- // feel free to change this if needed.) @@ -434,6 +445,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86 UNALIGNED_ACCESSES = 4, // ARM MOVW_MOVT_IMMEDIATE_LOADS = 5, // ARM VFP32DREGS = 6, // ARM + NEON = 7, // ARM SAHF = 0, // x86 FPU = 1}; // MIPS diff --git a/src/v8utils.h b/src/v8utils.h index ff9f8f2..fd3f4a5 100644 --- a/src/v8utils.h +++ b/src/v8utils.h @@ -317,6 +317,11 @@ template INLINE(static void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars)); +#if defined(V8_HOST_ARCH_ARM) +INLINE(void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars)); +INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars)); +INLINE(void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars)); +#endif // Copy from ASCII/16bit chars to ASCII/16bit chars. template @@ -375,6 +380,105 @@ void CopyCharsUnsigned(sinkchar* dest, const sourcechar* src, int chars) { } +#if defined(V8_HOST_ARCH_ARM) +void CopyCharsUnsigned(uint8_t* dest, const uint8_t* src, int chars) { + switch (static_cast(chars)) { + case 0: + break; + case 1: + *dest = *src; + break; + case 2: + memcpy(dest, src, 2); + break; + case 3: + memcpy(dest, src, 3); + break; + case 4: + memcpy(dest, src, 4); + break; + case 5: + memcpy(dest, src, 5); + break; + case 6: + memcpy(dest, src, 6); + break; + case 7: + memcpy(dest, src, 7); + break; + case 8: + memcpy(dest, src, 8); + break; + case 9: + memcpy(dest, src, 9); + break; + case 10: + memcpy(dest, src, 10); + break; + case 11: + memcpy(dest, src, 11); + break; + case 12: + memcpy(dest, src, 12); + break; + case 13: + memcpy(dest, src, 13); + break; + case 14: + memcpy(dest, src, 14); + break; + case 15: + memcpy(dest, src, 15); + break; + default: + OS::MemCopy(dest, src, chars); + break; + } +} + + +void CopyCharsUnsigned(uint16_t* dest, const uint8_t* src, int chars) { + if (chars >= OS::kMinComplexConvertMemCopy) { + OS::MemCopyUint16Uint8(dest, src, chars); + } else { + OS::MemCopyUint16Uint8Wrapper(dest, src, chars); + } +} + + +void CopyCharsUnsigned(uint16_t* dest, const uint16_t* src, int chars) { + switch (static_cast(chars)) { + case 0: + break; + case 1: + *dest = *src; + break; + case 2: + memcpy(dest, src, 4); + break; + case 3: + memcpy(dest, src, 6); + break; + case 4: + memcpy(dest, src, 8); + break; + case 5: + memcpy(dest, src, 10); + break; + case 6: + memcpy(dest, src, 12); + break; + case 7: + memcpy(dest, src, 14); + break; + default: + OS::MemCopy(dest, src, chars * sizeof(*dest)); + break; + } +} +#endif + + class StringBuilder : public SimpleStringBuilder { public: explicit StringBuilder(int size) : SimpleStringBuilder(size) { } diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index cb44ddc..c79e740 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -1227,4 +1227,186 @@ TEST(14) { CHECK_EQ(kArmNanLower32, BitCast(t.div_result) & 0xffffffffu); } + +TEST(15) { + // Test the Neon instructions. + CcTest::InitializeVM(); + Isolate* isolate = Isolate::Current(); + HandleScope scope(isolate); + + typedef struct { + uint32_t src0; + uint32_t src1; + uint32_t src2; + uint32_t src3; + uint32_t src4; + uint32_t src5; + uint32_t src6; + uint32_t src7; + uint32_t dst0; + uint32_t dst1; + uint32_t dst2; + uint32_t dst3; + uint32_t dst4; + uint32_t dst5; + uint32_t dst6; + uint32_t dst7; + uint32_t srcA0; + uint32_t srcA1; + uint32_t dstA0; + uint32_t dstA1; + uint32_t dstA2; + uint32_t dstA3; + } T; + T t; + + // Create a function that accepts &t, and loads, manipulates, and stores + // the doubles and floats. + Assembler assm(isolate, NULL, 0); + + + if (CpuFeatures::IsSupported(NEON)) { + CpuFeatureScope scope(&assm, NEON); + + __ stm(db_w, sp, r4.bit() | lr.bit()); + // Move 32 bytes with neon. + __ add(r4, r0, Operand(OFFSET_OF(T, src0))); + __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4)); + __ add(r4, r0, Operand(OFFSET_OF(T, dst0))); + __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4)); + + // Expand 8 bytes into 8 words(16 bits). + __ add(r4, r0, Operand(OFFSET_OF(T, srcA0))); + __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(r4)); + __ vmovl(NeonU8, q0, d0); + __ add(r4, r0, Operand(OFFSET_OF(T, dstA0))); + __ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(r4)); + + __ ldm(ia_w, sp, r4.bit() | pc.bit()); + + CodeDesc desc; + assm.GetCode(&desc); + Object* code = isolate->heap()->CreateCode( + desc, + Code::ComputeFlags(Code::STUB), + Handle())->ToObjectChecked(); + CHECK(code->IsCode()); +#ifdef DEBUG + Code::cast(code)->Print(); +#endif + F3 f = FUNCTION_CAST(Code::cast(code)->entry()); + t.src0 = 0x01020304; + t.src1 = 0x11121314; + t.src2 = 0x21222324; + t.src3 = 0x31323334; + t.src4 = 0x41424344; + t.src5 = 0x51525354; + t.src6 = 0x61626364; + t.src7 = 0x71727374; + t.dst0 = 0; + t.dst1 = 0; + t.dst2 = 0; + t.dst3 = 0; + t.dst4 = 0; + t.dst5 = 0; + t.dst6 = 0; + t.dst7 = 0; + t.srcA0 = 0x41424344; + t.srcA1 = 0x81828384; + t.dstA0 = 0; + t.dstA1 = 0; + t.dstA2 = 0; + t.dstA3 = 0; + Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0); + USE(dummy); + CHECK_EQ(0x01020304, t.dst0); + CHECK_EQ(0x11121314, t.dst1); + CHECK_EQ(0x21222324, t.dst2); + CHECK_EQ(0x31323334, t.dst3); + CHECK_EQ(0x41424344, t.dst4); + CHECK_EQ(0x51525354, t.dst5); + CHECK_EQ(0x61626364, t.dst6); + CHECK_EQ(0x71727374, t.dst7); + CHECK_EQ(0x00430044, t.dstA0); + CHECK_EQ(0x00410042, t.dstA1); + CHECK_EQ(0x00830084, t.dstA2); + CHECK_EQ(0x00810082, t.dstA3); + } +} + + +TEST(16) { + // Test the pkh, uxtb, uxtab and uxtb16 instructions. + CcTest::InitializeVM(); + Isolate* isolate = Isolate::Current(); + HandleScope scope(isolate); + + typedef struct { + uint32_t src0; + uint32_t src1; + uint32_t src2; + uint32_t dst0; + uint32_t dst1; + uint32_t dst2; + uint32_t dst3; + uint32_t dst4; + } T; + T t; + + // Create a function that accepts &t, and loads, manipulates, and stores + // the doubles and floats. + Assembler assm(isolate, NULL, 0); + + __ stm(db_w, sp, r4.bit() | lr.bit()); + + __ mov(r4, Operand(r0)); + __ ldr(r0, MemOperand(r4, OFFSET_OF(T, src0))); + __ ldr(r1, MemOperand(r4, OFFSET_OF(T, src1))); + + __ pkhbt(r2, r0, Operand(r1, LSL, 8)); + __ str(r2, MemOperand(r4, OFFSET_OF(T, dst0))); + + __ pkhtb(r2, r0, Operand(r1, ASR, 8)); + __ str(r2, MemOperand(r4, OFFSET_OF(T, dst1))); + + __ uxtb16(r2, Operand(r0, ROR, 8)); + __ str(r2, MemOperand(r4, OFFSET_OF(T, dst2))); + + __ uxtb(r2, Operand(r0, ROR, 8)); + __ str(r2, MemOperand(r4, OFFSET_OF(T, dst3))); + + __ ldr(r0, MemOperand(r4, OFFSET_OF(T, src2))); + __ uxtab(r2, r0, Operand(r1, ROR, 8)); + __ str(r2, MemOperand(r4, OFFSET_OF(T, dst4))); + + __ ldm(ia_w, sp, r4.bit() | pc.bit()); + + CodeDesc desc; + assm.GetCode(&desc); + Object* code = isolate->heap()->CreateCode( + desc, + Code::ComputeFlags(Code::STUB), + Handle())->ToObjectChecked(); + CHECK(code->IsCode()); +#ifdef DEBUG + Code::cast(code)->Print(); +#endif + F3 f = FUNCTION_CAST(Code::cast(code)->entry()); + t.src0 = 0x01020304; + t.src1 = 0x11121314; + t.src2 = 0x11121300; + t.dst0 = 0; + t.dst1 = 0; + t.dst2 = 0; + t.dst3 = 0; + t.dst4 = 0; + Object* dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0); + USE(dummy); + CHECK_EQ(0x12130304, t.dst0); + CHECK_EQ(0x01021213, t.dst1); + CHECK_EQ(0x00010003, t.dst2); + CHECK_EQ(0x00000003, t.dst3); + CHECK_EQ(0x11121313, t.dst4); +} + #undef __ diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc index 85b472d..9d6623e 100644 --- a/test/cctest/test-disasm-arm.cc +++ b/test/cctest/test-disasm-arm.cc @@ -405,6 +405,17 @@ TEST(Type3) { "e6ff3f94 usat r3, #31, r4, lsl #31"); COMPARE(usat(r8, 0, Operand(r5, ASR, 17)), "e6e088d5 usat r8, #0, r5, asr #17"); + + COMPARE(pkhbt(r3, r4, Operand(r5, LSL, 17)), + "e6843895 pkhbt r3, r4, r5, lsl #17"); + COMPARE(pkhtb(r3, r4, Operand(r5, ASR, 17)), + "e68438d5 pkhtb r3, r4, r5, asr #17"); + COMPARE(uxtb(r3, Operand(r4, ROR, 8)), + "e6ef3474 uxtb r3, r4, ror #8"); + COMPARE(uxtab(r3, r4, Operand(r5, ROR, 8)), + "e6e43475 uxtab r3, r4, r5, ror #8"); + COMPARE(uxtb16(r3, Operand(r4, ROR, 8)), + "e6cf3474 uxtb16 r3, r4, ror #8"); } VERIFY_RUN(); @@ -662,6 +673,23 @@ TEST(Vfp) { } +TEST(Neon) { + SET_UP(); + + if (CpuFeatures::IsSupported(NEON)) { + CpuFeatureScope scope(&assm, NEON); + COMPARE(vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(r1)), + "f421420f vld1.8 {d4, d5, d6, d7}, [r1]"); + COMPARE(vst1(Neon16, NeonListOperand(d17, 4), NeonMemOperand(r9)), + "f449124f vst1.16 {d17, d18, d19, d20}, [r9]"); + COMPARE(vmovl(NeonU8, q4, d2), + "f3884a12 vmovl.u8 q4, d2"); + } + + VERIFY_RUN(); +} + + TEST(LoadStore) { SET_UP(); @@ -858,6 +886,11 @@ TEST(LoadStore) { "e1eba7ff strd r10, [fp, #+127]!"); COMPARE(strd(ip, sp, MemOperand(sp, -127, PreIndex)), "e16dc7ff strd ip, [sp, #-127]!"); + + COMPARE(pld(MemOperand(r1, 0)), + "f5d1f000 pld [r1]"); + COMPARE(pld(MemOperand(r2, 128)), + "f5d2f080 pld [r2, #+128]"); } VERIFY_RUN(); -- 2.7.4