From 437fef500002a96d78773a08824738ad5c037980 Mon Sep 17 00:00:00 2001 From: "rodolph.perfetta@arm.com" Date: Thu, 7 Aug 2014 10:46:40 +0000 Subject: [PATCH] ARM64: Support arbitrary offset in load/store pair. TF calls can generate code exceeding the instruction range. BUG= R=bmeurer@chromium.org Review URL: https://codereview.chromium.org/440303004 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@22969 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm64/assembler-arm64.cc | 6 ++ src/arm64/assembler-arm64.h | 8 +- src/arm64/macro-assembler-arm64-inl.h | 37 ++----- src/arm64/macro-assembler-arm64.cc | 33 ++++++ src/arm64/macro-assembler-arm64.h | 22 ++-- test/cctest/test-assembler-arm64.cc | 184 ++++++++++++++++++++++++++++++++++ 6 files changed, 250 insertions(+), 40 deletions(-) diff --git a/src/arm64/assembler-arm64.cc b/src/arm64/assembler-arm64.cc index dc2d587..7f86e14 100644 --- a/src/arm64/assembler-arm64.cc +++ b/src/arm64/assembler-arm64.cc @@ -2503,6 +2503,12 @@ bool Assembler::IsImmLSScaled(ptrdiff_t offset, LSDataSize size) { } +bool Assembler::IsImmLSPair(ptrdiff_t offset, LSDataSize size) { + bool offset_is_size_multiple = (((offset >> size) << size) == offset); + return offset_is_size_multiple && is_int7(offset >> size); +} + + // Test if a given value can be encoded in the immediate field of a logical // instruction. // If it can be encoded, the function returns true, and values pointed to by n, diff --git a/src/arm64/assembler-arm64.h b/src/arm64/assembler-arm64.h index e16ea87..d0effa7 100644 --- a/src/arm64/assembler-arm64.h +++ b/src/arm64/assembler-arm64.h @@ -1945,6 +1945,10 @@ class Assembler : public AssemblerBase { static bool IsImmLSUnscaled(ptrdiff_t offset); static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size); + void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& addr, LoadStorePairOp op); + static bool IsImmLSPair(ptrdiff_t offset, LSDataSize size); + void Logical(const Register& rd, const Register& rn, const Operand& operand, @@ -2027,10 +2031,6 @@ class Assembler : public AssemblerBase { const Operand& operand, FlagsUpdate S, Instr op); - void LoadStorePair(const CPURegister& rt, - const CPURegister& rt2, - const MemOperand& addr, - LoadStorePairOp op); void LoadStorePairNonTemporal(const CPURegister& rt, const CPURegister& rt2, const MemOperand& addr, diff --git a/src/arm64/macro-assembler-arm64-inl.h b/src/arm64/macro-assembler-arm64-inl.h index cf5062d..f7c7248 100644 --- a/src/arm64/macro-assembler-arm64-inl.h +++ b/src/arm64/macro-assembler-arm64-inl.h @@ -299,6 +299,16 @@ LS_MACRO_LIST(DEFINE_FUNCTION) #undef DEFINE_FUNCTION +#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ + void MacroAssembler::FN(const REGTYPE REG, const REGTYPE REG2, \ + const MemOperand& addr) { \ + DCHECK(allow_macro_instructions_); \ + LoadStorePairMacro(REG, REG2, addr, OP); \ + } +LSPAIR_MACRO_LIST(DEFINE_FUNCTION) +#undef DEFINE_FUNCTION + + void MacroAssembler::Asr(const Register& rd, const Register& rn, unsigned shift) { @@ -861,25 +871,6 @@ void MacroAssembler::Ldnp(const CPURegister& rt, } -void MacroAssembler::Ldp(const CPURegister& rt, - const CPURegister& rt2, - const MemOperand& src) { - DCHECK(allow_macro_instructions_); - DCHECK(!AreAliased(rt, rt2)); - ldp(rt, rt2, src); -} - - -void MacroAssembler::Ldpsw(const Register& rt, - const Register& rt2, - const MemOperand& src) { - DCHECK(allow_macro_instructions_); - DCHECK(!rt.IsZero()); - DCHECK(!rt2.IsZero()); - ldpsw(rt, rt2, src); -} - - void MacroAssembler::Ldr(const CPURegister& rt, const Immediate& imm) { DCHECK(allow_macro_instructions_); ldr(rt, imm); @@ -1136,14 +1127,6 @@ void MacroAssembler::Stnp(const CPURegister& rt, } -void MacroAssembler::Stp(const CPURegister& rt, - const CPURegister& rt2, - const MemOperand& dst) { - DCHECK(allow_macro_instructions_); - stp(rt, rt2, dst); -} - - void MacroAssembler::Sxtb(const Register& rd, const Register& rn) { DCHECK(allow_macro_instructions_); DCHECK(!rd.IsZero()); diff --git a/src/arm64/macro-assembler-arm64.cc b/src/arm64/macro-assembler-arm64.cc index 98a970e..658497b 100644 --- a/src/arm64/macro-assembler-arm64.cc +++ b/src/arm64/macro-assembler-arm64.cc @@ -588,6 +588,39 @@ void MacroAssembler::LoadStoreMacro(const CPURegister& rt, } } +void MacroAssembler::LoadStorePairMacro(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op) { + // TODO(all): Should we support register offset for load-store-pair? + DCHECK(!addr.IsRegisterOffset()); + + int64_t offset = addr.offset(); + LSDataSize size = CalcLSPairDataSize(op); + + // Check if the offset fits in the immediate field of the appropriate + // instruction. If not, emit two instructions to perform the operation. + if (IsImmLSPair(offset, size)) { + // Encodable in one load/store pair instruction. + LoadStorePair(rt, rt2, addr, op); + } else { + Register base = addr.base(); + if (addr.IsImmediateOffset()) { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(base); + Add(temp, base, offset); + LoadStorePair(rt, rt2, MemOperand(temp), op); + } else if (addr.IsPostIndex()) { + LoadStorePair(rt, rt2, MemOperand(base), op); + Add(base, base, offset); + } else { + DCHECK(addr.IsPreIndex()); + Add(base, base, offset); + LoadStorePair(rt, rt2, MemOperand(base), op); + } + } +} + void MacroAssembler::Load(const Register& rt, const MemOperand& addr, diff --git a/src/arm64/macro-assembler-arm64.h b/src/arm64/macro-assembler-arm64.h index efb6bcf..47ffff1 100644 --- a/src/arm64/macro-assembler-arm64.h +++ b/src/arm64/macro-assembler-arm64.h @@ -43,6 +43,11 @@ namespace internal { V(Str, CPURegister&, rt, StoreOpFor(rt)) \ V(Ldrsw, Register&, rt, LDRSW_x) +#define LSPAIR_MACRO_LIST(V) \ + V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \ + V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \ + V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x) + // ---------------------------------------------------------------------------- // Static helper functions @@ -261,6 +266,14 @@ class MacroAssembler : public Assembler { const MemOperand& addr, LoadStoreOp op); +#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ + inline void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr); + LSPAIR_MACRO_LIST(DECLARE_FUNCTION) +#undef DECLARE_FUNCTION + + void LoadStorePairMacro(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& addr, LoadStorePairOp op); + // V8-specific load/store helpers. void Load(const Register& rt, const MemOperand& addr, Representation r); void Store(const Register& rt, const MemOperand& addr, Representation r); @@ -418,12 +431,6 @@ class MacroAssembler : public Assembler { inline void Ldnp(const CPURegister& rt, const CPURegister& rt2, const MemOperand& src); - inline void Ldp(const CPURegister& rt, - const CPURegister& rt2, - const MemOperand& src); - inline void Ldpsw(const Register& rt, - const Register& rt2, - const MemOperand& src); // Load a literal from the inline constant pool. inline void Ldr(const CPURegister& rt, const Immediate& imm); // Helper function for double immediate. @@ -483,9 +490,6 @@ class MacroAssembler : public Assembler { inline void Stnp(const CPURegister& rt, const CPURegister& rt2, const MemOperand& dst); - inline void Stp(const CPURegister& rt, - const CPURegister& rt2, - const MemOperand& dst); inline void Sxtb(const Register& rd, const Register& rn); inline void Sxth(const Register& rd, const Register& rn); inline void Sxtw(const Register& rd, const Register& rn); diff --git a/test/cctest/test-assembler-arm64.cc b/test/cctest/test-assembler-arm64.cc index 8183920..3d05487 100644 --- a/test/cctest/test-assembler-arm64.cc +++ b/test/cctest/test-assembler-arm64.cc @@ -2911,6 +2911,64 @@ TEST(ldp_stp_offset) { } +TEST(ldp_stp_offset_wide) { + INIT_V8(); + SETUP(); + + uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff, + 0xffeeddccbbaa9988}; + uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0}; + uintptr_t src_base = reinterpret_cast(src); + uintptr_t dst_base = reinterpret_cast(dst); + // Move base too far from the array to force multiple instructions + // to be emitted. + const int64_t base_offset = 1024; + + START(); + __ Mov(x20, src_base - base_offset); + __ Mov(x21, dst_base - base_offset); + __ Mov(x18, src_base + base_offset + 24); + __ Mov(x19, dst_base + base_offset + 56); + __ Ldp(w0, w1, MemOperand(x20, base_offset)); + __ Ldp(w2, w3, MemOperand(x20, base_offset + 4)); + __ Ldp(x4, x5, MemOperand(x20, base_offset + 8)); + __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset)); + __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset)); + __ Stp(w0, w1, MemOperand(x21, base_offset)); + __ Stp(w2, w3, MemOperand(x21, base_offset + 8)); + __ Stp(x4, x5, MemOperand(x21, base_offset + 16)); + __ Stp(w6, w7, MemOperand(x19, -24 - base_offset)); + __ Stp(x8, x9, MemOperand(x19, -16 - base_offset)); + END(); + + RUN(); + + CHECK_EQUAL_64(0x44556677, x0); + CHECK_EQUAL_64(0x00112233, x1); + CHECK_EQUAL_64(0x0011223344556677UL, dst[0]); + CHECK_EQUAL_64(0x00112233, x2); + CHECK_EQUAL_64(0xccddeeff, x3); + CHECK_EQUAL_64(0xccddeeff00112233UL, dst[1]); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x4); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[2]); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x5); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[3]); + CHECK_EQUAL_64(0x8899aabb, x6); + CHECK_EQUAL_64(0xbbaa9988, x7); + CHECK_EQUAL_64(0xbbaa99888899aabbUL, dst[4]); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x8); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[5]); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x9); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[6]); + CHECK_EQUAL_64(src_base - base_offset, x20); + CHECK_EQUAL_64(dst_base - base_offset, x21); + CHECK_EQUAL_64(src_base + base_offset + 24, x18); + CHECK_EQUAL_64(dst_base + base_offset + 56, x19); + + TEARDOWN(); +} + + TEST(ldnp_stnp_offset) { INIT_V8(); SETUP(); @@ -3021,6 +3079,69 @@ TEST(ldp_stp_preindex) { } +TEST(ldp_stp_preindex_wide) { + INIT_V8(); + SETUP(); + + uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff, + 0xffeeddccbbaa9988}; + uint64_t dst[5] = {0, 0, 0, 0, 0}; + uintptr_t src_base = reinterpret_cast(src); + uintptr_t dst_base = reinterpret_cast(dst); + // Move base too far from the array to force multiple instructions + // to be emitted. + const int64_t base_offset = 1024; + + START(); + __ Mov(x24, src_base - base_offset); + __ Mov(x25, dst_base + base_offset); + __ Mov(x18, dst_base + base_offset + 16); + __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex)); + __ Mov(x19, x24); + __ Mov(x24, src_base - base_offset + 4); + __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex)); + __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex)); + __ Mov(x20, x25); + __ Mov(x25, dst_base + base_offset + 4); + __ Mov(x24, src_base - base_offset); + __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex)); + __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex)); + __ Mov(x21, x24); + __ Mov(x24, src_base - base_offset + 8); + __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex)); + __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex)); + __ Mov(x22, x18); + __ Mov(x18, dst_base + base_offset + 16 + 8); + __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex)); + END(); + + RUN(); + + CHECK_EQUAL_64(0x00112233, x0); + CHECK_EQUAL_64(0xccddeeff, x1); + CHECK_EQUAL_64(0x44556677, x2); + CHECK_EQUAL_64(0x00112233, x3); + CHECK_EQUAL_64(0xccddeeff00112233UL, dst[0]); + CHECK_EQUAL_64(0x0000000000112233UL, dst[1]); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x4); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x5); + CHECK_EQUAL_64(0x0011223344556677UL, x6); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x7); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[2]); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[3]); + CHECK_EQUAL_64(0x0011223344556677UL, dst[4]); + CHECK_EQUAL_64(src_base, x24); + CHECK_EQUAL_64(dst_base, x25); + CHECK_EQUAL_64(dst_base + 16, x18); + CHECK_EQUAL_64(src_base + 4, x19); + CHECK_EQUAL_64(dst_base + 4, x20); + CHECK_EQUAL_64(src_base + 8, x21); + CHECK_EQUAL_64(dst_base + 24, x22); + + TEARDOWN(); +} + + TEST(ldp_stp_postindex) { INIT_V8(); SETUP(); @@ -3076,6 +3197,69 @@ TEST(ldp_stp_postindex) { } +TEST(ldp_stp_postindex_wide) { + INIT_V8(); + SETUP(); + + uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff, 0xffeeddccbbaa9988, + 0x7766554433221100}; + uint64_t dst[5] = {0, 0, 0, 0, 0}; + uintptr_t src_base = reinterpret_cast(src); + uintptr_t dst_base = reinterpret_cast(dst); + // Move base too far from the array to force multiple instructions + // to be emitted. + const int64_t base_offset = 1024; + + START(); + __ Mov(x24, src_base); + __ Mov(x25, dst_base); + __ Mov(x18, dst_base + 16); + __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex)); + __ Mov(x19, x24); + __ Sub(x24, x24, base_offset); + __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex)); + __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex)); + __ Mov(x20, x25); + __ Sub(x24, x24, base_offset); + __ Add(x25, x25, base_offset); + __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex)); + __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex)); + __ Mov(x21, x24); + __ Sub(x24, x24, base_offset); + __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex)); + __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex)); + __ Mov(x22, x18); + __ Add(x18, x18, base_offset); + __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex)); + END(); + + RUN(); + + CHECK_EQUAL_64(0x44556677, x0); + CHECK_EQUAL_64(0x00112233, x1); + CHECK_EQUAL_64(0x00112233, x2); + CHECK_EQUAL_64(0xccddeeff, x3); + CHECK_EQUAL_64(0x4455667700112233UL, dst[0]); + CHECK_EQUAL_64(0x0000000000112233UL, dst[1]); + CHECK_EQUAL_64(0x0011223344556677UL, x4); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x5); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, x6); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, x7); + CHECK_EQUAL_64(0xffeeddccbbaa9988UL, dst[2]); + CHECK_EQUAL_64(0x8899aabbccddeeffUL, dst[3]); + CHECK_EQUAL_64(0x0011223344556677UL, dst[4]); + CHECK_EQUAL_64(src_base + base_offset, x24); + CHECK_EQUAL_64(dst_base - base_offset, x25); + CHECK_EQUAL_64(dst_base - base_offset + 16, x18); + CHECK_EQUAL_64(src_base + base_offset + 4, x19); + CHECK_EQUAL_64(dst_base - base_offset + 4, x20); + CHECK_EQUAL_64(src_base + base_offset + 8, x21); + CHECK_EQUAL_64(dst_base - base_offset + 24, x22); + + TEARDOWN(); +} + + TEST(ldp_sign_extend) { INIT_V8(); SETUP(); -- 2.7.4