1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
25 * Vladimir Vukicevic <vladimir@pobox.com>
26 * Jacob Bramley <Jacob.Bramley@arm.com>
27 * Tero Koskinen <tero.koskinen@digia.com>
29 * Alternatively, the contents of this file may be used under the terms of
30 * either the GNU General Public License Version 2 or later (the "GPL"), or
31 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
32 * in which case the provisions of the GPL or the LGPL are applicable instead
33 * of those above. If you wish to allow use of your version of this file only
34 * under the terms of either the GPL or the LGPL, and not to allow others to
35 * use your version of this file under the terms of the MPL, indicate your
36 * decision by deleting the provisions above and replace them with the notice
37 * and other provisions required by the GPL or the LGPL. If you do not delete
38 * the provisions above, a recipient may use your version of this file under
39 * the terms of any one of the MPL, the GPL or the LGPL.
41 * ***** END LICENSE BLOCK ***** */
46 #include <cmnintrin.h>
49 #if defined(FEATURE_NANOJIT) && defined(NANOJIT_ARM)
55 const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","fp","ip","sp","lr","pc",
56 "d0","d1","d2","d3","d4","d5","d6","d7","s0"};
57 const char* condNames[] = {"eq","ne","cs","cc","mi","pl","vs","vc","hi","ls","ge","lt","gt","le",""/*al*/,"nv"};
58 const char* shiftNames[] = { "lsl", "lsl", "lsr", "lsr", "asr", "asr", "ror", "ror" };
61 const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
62 const Register Assembler::retRegs[] = { R0, R1 };
63 const Register Assembler::savedRegs[] = { R4, R5, R6, R7, R8, R9, R10 };
65 // --------------------------------
66 // ARM-specific utility functions.
67 // --------------------------------
70 // Return true if enc is a valid Operand 2 encoding and thus can be used as-is
71 // in an ARM arithmetic operation that accepts such encoding.
73 // This utility does not know (or determine) the actual value that the encoded
74 // value represents, and thus cannot be used to ensure the correct operation of
75 // encOp2Imm, but it does ensure that the encoded value can be used to encode a
76 // valid ARM instruction. decOp2Imm can be used if you also need to check that
77 // a literal is correctly encoded (and thus that encOp2Imm is working
80 Assembler::isOp2Imm(uint32_t enc)
82 return ((enc & 0xfff) == enc);
85 // Decodes operand 2 immediate values (for debug output and assertions).
87 Assembler::decOp2Imm(uint32_t enc)
89 NanoAssert(isOp2Imm(enc));
91 uint32_t imm8 = enc & 0xff;
92 uint32_t rot = 32 - ((enc >> 7) & 0x1e);
94 return imm8 << (rot & 0x1f);
98 // Calculate the number of leading zeroes in data.
99 static inline uint32_t
100 CountLeadingZeroesSlow(uint32_t data)
102 // Other platforms must fall back to a C routine. This won't be as
103 // efficient as the CLZ instruction, but it is functional.
106 uint32_t leading_zeroes = 0;
108 // This loop does a bisection search rather than the obvious rotation loop.
109 // This should be faster, though it will still be no match for CLZ.
110 for (try_shift = 16; try_shift != 0; try_shift /= 2) {
111 uint32_t shift = leading_zeroes + try_shift;
112 if (((data << shift) >> shift) == data) {
113 leading_zeroes = shift;
117 return leading_zeroes;
121 Assembler::CountLeadingZeroes(uint32_t data)
123 uint32_t leading_zeroes;
125 #if defined(__ARMCC__)
126 // ARMCC can do this with an intrinsic.
127 leading_zeroes = __clz(data);
128 #elif defined(__GNUC__)
129 // GCC can use inline assembler to insert a CLZ instruction.
130 if (ARM_ARCH_AT_LEAST(5)) {
132 #if defined(ANDROID) && (NJ_COMPILER_ARM_ARCH < 7)
133 // On Android gcc compiler, the clz instruction is not supported with a
134 // target smaller than armv7, despite it being legal for armv5+.
136 #elif (NJ_COMPILER_ARM_ARCH < 5)
137 // Targetting armv5t allows a toolchain with armv4t target to still build
138 // with clz, and clz to be used when appropriate at runtime.
142 : "=r" (leading_zeroes)
146 leading_zeroes = CountLeadingZeroesSlow(data);
148 #elif defined(UNDER_CE)
149 // WinCE can do this with an intrinsic.
150 leading_zeroes = _CountLeadingZeros(data);
152 leading_zeroes = CountLeadingZeroesSlow(data);
155 // Assert that the operation worked!
156 NanoAssert(((0xffffffff >> leading_zeroes) & data) == data);
158 return leading_zeroes;
161 // The ARM instruction set allows some flexibility to the second operand of
162 // most arithmetic operations. When operand 2 is an immediate value, it takes
163 // the form of an 8-bit value rotated by an even value in the range 0-30.
165 // Some values that can be encoded this scheme — such as 0xf000000f — are
166 // probably fairly rare in practice and require extra code to detect, so this
167 // function implements a fast CLZ-based heuristic to detect any value that can
168 // be encoded using just a shift, and not a full rotation. For example,
169 // 0xff000000 and 0x000000ff are both detected, but 0xf000000f is not.
171 // This function will return true to indicate that the encoding was successful,
172 // or false to indicate that the literal could not be encoded as an operand 2
173 // immediate. If successful, the encoded value will be written to *enc.
175 Assembler::encOp2Imm(uint32_t literal, uint32_t * enc)
177 // The number of leading zeroes in the literal. This is used to calculate
178 // the rotation component of the encoding.
179 uint32_t leading_zeroes;
181 // Components of the operand 2 encoding.
185 // Check the literal to see if it is a simple 8-bit value. I suspect that
186 // most literals are in fact small values, so doing this check early should
187 // give a decent speed-up.
194 // Determine the number of leading zeroes in the literal. This is used to
195 // calculate the required rotation.
196 leading_zeroes = CountLeadingZeroes(literal);
198 // We've already done a check to see if the literal is an 8-bit value, so
199 // leading_zeroes must be less than (and not equal to) (32-8)=24. However,
200 // if it is greater than 24, this algorithm will break, so debug code
201 // should use an assertion here to check that we have a value that we
203 NanoAssert(leading_zeroes < 24);
205 // Assuming that we have a field of no more than 8 bits for a valid
206 // literal, we can calculate the required rotation by subtracting
207 // leading_zeroes from (32-8):
210 // 0: Known to be zero.
211 // 1: Known to be one.
212 // X: Either zero or one.
213 // .: Zero in a valid operand 2 literal.
215 // Literal: [ 1XXXXXXX ........ ........ ........ ]
216 // leading_zeroes = 0
217 // Therefore rot (left) = 24.
218 // Encoded 8-bit literal: [ 1XXXXXXX ]
220 // Literal: [ ........ ..1XXXXX XX...... ........ ]
221 // leading_zeroes = 10
222 // Therefore rot (left) = 14.
223 // Encoded 8-bit literal: [ 1XXXXXXX ]
225 // Note, however, that we can only encode even shifts, and so
226 // "rot=24-leading_zeroes" is not sufficient by itself. By ignoring
227 // zero-bits in odd bit positions, we can ensure that we get a valid
231 // Literal: [ 01XXXXXX ........ ........ ........ ]
232 // leading_zeroes = 1
233 // Therefore rot (left) = round_up(23) = 24.
234 // Encoded 8-bit literal: [ 01XXXXXX ]
235 rot = 24 - (leading_zeroes & ~1);
237 // The imm8 component of the operand 2 encoding can be calculated from the
239 imm8 = literal >> rot;
241 // The validity of the literal can be checked by reversing the
242 // calculation. It is much easier to decode the immediate than it is to
244 if (literal != (imm8 << rot)) {
245 // The encoding is not valid, so report the failure. Calling code
246 // should use some other method of loading the value (such as LDR).
250 // The operand is valid, so encode it.
251 // Note that the ARM encoding is actually described by a rotate to the
252 // _right_, so rot must be negated here. Calculating a left shift (rather
253 // than calculating a right rotation) simplifies the above code.
254 *enc = ((-rot << 7) & 0xf00) | imm8;
256 // Assert that the operand was properly encoded.
257 NanoAssert(decOp2Imm(*enc) == literal);
262 // Encode "rd = rn + imm" using an appropriate instruction sequence.
263 // Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
264 // (The declaration in NativeARM.h defines the default value of stat as 0.)
266 // It is not valid to call this function if:
267 // (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(-imm)
268 // Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
271 Assembler::asm_add_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
273 // Operand 2 encoding of the immediate.
276 NanoAssert(IsGpReg(rd));
277 NanoAssert(IsGpReg(rn));
278 NanoAssert((stat & 1) == stat);
280 // As a special case to simplify code elsewhere, emit nothing where we
281 // don't want to update the flags (stat == 0), the second operand is 0 and
282 // (rd == rn). Such instructions are effectively NOPs.
283 if ((imm == 0) && (stat == 0) && (rd == rn)) {
287 // Try to encode the value directly as an operand 2 immediate value, then
288 // fall back to loading the value into a register.
289 if (encOp2Imm(imm, &op2imm)) {
290 ADDis(rd, rn, op2imm, stat);
291 } else if (encOp2Imm(-imm, &op2imm)) {
292 // We could not encode the value for ADD, so try to encode it for SUB.
293 // Note that this is valid even if stat is set, _unless_ imm is 0, but
294 // that case is caught above.
295 NanoAssert(imm != 0);
296 SUBis(rd, rn, op2imm, stat);
298 // We couldn't encode the value directly, so use an intermediate
299 // register to encode the value. We will use IP to do this unless rn is
300 // IP; in that case we can reuse rd. This allows every case other than
301 // "ADD IP, IP, =#imm".
302 Register rm = (rn == IP) ? (rd) : (IP);
303 NanoAssert(rn != rm);
305 ADDs(rd, rn, rm, stat);
310 // Encode "rd = rn - imm" using an appropriate instruction sequence.
311 // Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
312 // (The declaration in NativeARM.h defines the default value of stat as 0.)
314 // It is not valid to call this function if:
315 // (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(-imm)
316 // Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
319 Assembler::asm_sub_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
321 // Operand 2 encoding of the immediate.
324 NanoAssert(IsGpReg(rd));
325 NanoAssert(IsGpReg(rn));
326 NanoAssert((stat & 1) == stat);
328 // As a special case to simplify code elsewhere, emit nothing where we
329 // don't want to update the flags (stat == 0), the second operand is 0 and
330 // (rd == rn). Such instructions are effectively NOPs.
331 if ((imm == 0) && (stat == 0) && (rd == rn)) {
335 // Try to encode the value directly as an operand 2 immediate value, then
336 // fall back to loading the value into a register.
337 if (encOp2Imm(imm, &op2imm)) {
338 SUBis(rd, rn, op2imm, stat);
339 } else if (encOp2Imm(-imm, &op2imm)) {
340 // We could not encode the value for SUB, so try to encode it for ADD.
341 // Note that this is valid even if stat is set, _unless_ imm is 0, but
342 // that case is caught above.
343 NanoAssert(imm != 0);
344 ADDis(rd, rn, op2imm, stat);
346 // We couldn't encode the value directly, so use an intermediate
347 // register to encode the value. We will use IP to do this unless rn is
348 // IP; in that case we can reuse rd. This allows every case other than
349 // "SUB IP, IP, =#imm".
350 Register rm = (rn == IP) ? (rd) : (IP);
351 NanoAssert(rn != rm);
353 SUBs(rd, rn, rm, stat);
358 // Encode "rd = rn & imm" using an appropriate instruction sequence.
359 // Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
360 // (The declaration in NativeARM.h defines the default value of stat as 0.)
362 // It is not valid to call this function if:
363 // (rd == IP) AND (rn == IP) AND !encOp2Imm(imm) AND !encOp2Imm(~imm)
364 // Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
367 Assembler::asm_and_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
369 // Operand 2 encoding of the immediate.
372 NanoAssert(IsGpReg(rd));
373 NanoAssert(IsGpReg(rn));
374 NanoAssert((stat & 1) == stat);
376 // Try to encode the value directly as an operand 2 immediate value, then
377 // fall back to loading the value into a register.
378 if (encOp2Imm(imm, &op2imm)) {
379 ANDis(rd, rn, op2imm, stat);
380 } else if (encOp2Imm(~imm, &op2imm)) {
381 // Use BIC with the inverted immediate.
382 BICis(rd, rn, op2imm, stat);
384 // We couldn't encode the value directly, so use an intermediate
385 // register to encode the value. We will use IP to do this unless rn is
386 // IP; in that case we can reuse rd. This allows every case other than
387 // "AND IP, IP, =#imm".
388 Register rm = (rn == IP) ? (rd) : (IP);
389 NanoAssert(rn != rm);
391 ANDs(rd, rn, rm, stat);
396 // Encode "rd = rn | imm" using an appropriate instruction sequence.
397 // Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
398 // (The declaration in NativeARM.h defines the default value of stat as 0.)
400 // It is not valid to call this function if:
401 // (rd == IP) AND (rn == IP) AND !encOp2Imm(imm)
402 // Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
405 Assembler::asm_orr_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
407 // Operand 2 encoding of the immediate.
410 NanoAssert(IsGpReg(rd));
411 NanoAssert(IsGpReg(rn));
412 NanoAssert((stat & 1) == stat);
414 // Try to encode the value directly as an operand 2 immediate value, then
415 // fall back to loading the value into a register.
416 if (encOp2Imm(imm, &op2imm)) {
417 ORRis(rd, rn, op2imm, stat);
419 // We couldn't encode the value directly, so use an intermediate
420 // register to encode the value. We will use IP to do this unless rn is
421 // IP; in that case we can reuse rd. This allows every case other than
422 // "ORR IP, IP, =#imm".
423 Register rm = (rn == IP) ? (rd) : (IP);
424 NanoAssert(rn != rm);
426 ORRs(rd, rn, rm, stat);
431 // Encode "rd = rn ^ imm" using an appropriate instruction sequence.
432 // Set stat to 1 to update the status flags. Otherwise, set it to 0 or omit it.
433 // (The declaration in NativeARM.h defines the default value of stat as 0.)
435 // It is not valid to call this function if:
436 // (rd == IP) AND (rn == IP) AND !encOp2Imm(imm)
437 // Where: if (encOp2Imm(imm)), imm can be encoded as an ARM operand 2 using the
440 Assembler::asm_eor_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
442 // Operand 2 encoding of the immediate.
445 NanoAssert(IsGpReg(rd));
446 NanoAssert(IsGpReg(rn));
447 NanoAssert((stat & 1) == stat);
449 // Try to encode the value directly as an operand 2 immediate value, then
450 // fall back to loading the value into a register.
451 if (encOp2Imm(imm, &op2imm)) {
452 EORis(rd, rn, op2imm, stat);
454 // We couldn't encoder the value directly, so use an intermediate
455 // register to encode the value. We will use IP to do this unless rn is
456 // IP; in that case we can reuse rd. This allows every case other than
457 // "EOR IP, IP, =#imm".
458 Register rm = (rn == IP) ? (rd) : (IP);
459 NanoAssert(rn != rm);
461 EORs(rd, rn, rm, stat);
466 // --------------------------------
467 // Assembler functions.
468 // --------------------------------
471 Assembler::nInit(AvmCore*)
473 nHints[LIR_calli] = rmask(retRegs[0]);
474 nHints[LIR_hcalli] = rmask(retRegs[1]);
475 nHints[LIR_paramp] = PREFER_SPECIAL;
478 void Assembler::nBeginAssembly()
484 Assembler::genPrologue()
490 // NJ_RESV_OFFSET is space at the top of the stack for us
491 // to use for parameter passing (8 bytes at the moment)
492 uint32_t stackNeeded = max_out_args + STACK_GRANULARITY * _activation.stackSlotsNeeded();
493 uint32_t savingCount = 2;
495 uint32_t savingMask = rmask(FP) | rmask(LR);
497 // so for alignment purposes we've pushed return addr and fp
498 uint32_t stackPushed = STACK_GRANULARITY * savingCount;
499 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
500 int32_t amt = aligned - stackPushed;
502 // Make room on stack for what we are doing
504 asm_sub_imm(SP, SP, amt);
506 verbose_only( asm_output("## %p:",(void*)_nIns); )
507 verbose_only( asm_output("## patch entry"); )
508 NIns *patchEntry = _nIns;
511 PUSH_mask(savingMask);
516 Assembler::nFragExit(LIns* guard)
518 SideExit * exit = guard->record()->exit;
519 Fragment * frag = exit->target;
521 bool target_is_known = frag && frag->fragEntry;
523 if (target_is_known) {
524 // The target exists so we can simply emit a branch to its location.
525 JMP_far(frag->fragEntry);
527 // The target doesn't exit yet, so emit a jump to the epilogue. If the
528 // target is created later on, the jump will be patched.
530 GuardRecord *gr = guard->record();
533 _epilogue = genEpilogue();
535 // Jump to the epilogue. This may get patched later, but JMP_far always
536 // emits two instructions even when only one is required, so patching
537 // will work correctly.
540 // In the future you may want to move this further down so that we can
541 // overwrite the r0 guard record load during a patch to a different
542 // fragment with some assumed input-register state. Not today though.
545 // NB: this is a workaround for the fact that, by patching a
546 // fragment-exit jump, we could be changing the *meaning* of the R0
547 // register we're passing to the jump target. If we jump to the
548 // epilogue, ideally R0 means "return value when exiting fragment".
549 // If we patch this to jump to another fragment however, R0 means
550 // "incoming 0th parameter". This is just a quirk of ARM ABI. So
551 // we compromise by passing "return value" to the epilogue in IP,
552 // not R0, and have the epilogue MOV(R0, IP) first thing.
554 asm_ld_imm(IP, int(gr));
558 if (_config.arm_show_stats) {
559 // load R1 with Fragment *fromFrag, target fragment
560 // will make use of this when calling fragenter().
561 int fromfrag = int((Fragment*)_thisfrag);
562 asm_ld_imm(argRegs[1], fromfrag);
566 // profiling for the exit
568 if (_logc->lcbits & LC_FragProfile) {
569 asm_inc_m32( &guard->record()->profCount );
573 // Pop the stack frame.
578 Assembler::genEpilogue()
580 RegisterMask savingMask;
582 if (ARM_ARCH_AT_LEAST(5)) {
583 // On ARMv5+, loading directly to PC correctly handles interworking.
584 savingMask = rmask(FP) | rmask(PC);
587 // On ARMv4T, interworking is not handled properly, therefore, we pop
588 // lr and use bx lr to avoid that.
589 savingMask = rmask(FP) | rmask(LR);
592 POP_mask(savingMask); // regs
594 // NB: this is the later half of the dual-nature patchable exit branch
595 // workaround noted above in nFragExit. IP has the "return value"
596 // incoming, we need to move it to R0.
603 * asm_arg will encode the specified argument according to the current ABI, and
604 * will update r and stkd as appropriate so that the next argument can be
607 * Linux has used ARM's EABI for some time. Windows CE uses the legacy ABI.
610 * - doubles are 64-bit aligned both in registers and on the stack.
611 * If the next available argument register is R1, it is skipped
612 * and the double is placed in R2:R3. If R0:R1 or R2:R3 are not
613 * available, the double is placed on the stack, 64-bit aligned.
614 * - 32-bit arguments are placed in registers and 32-bit aligned
617 * Under EABI with hardware floating-point procedure-call variant:
618 * - Same as EABI, but doubles are passed in D0..D7 registers.
621 * - doubles are placed in subsequent arg registers; if the next
622 * available register is r3, the low order word goes into r3
623 * and the high order goes on the stack.
624 * - 32-bit arguments are placed in the next available arg register,
625 * - both doubles and 32-bit arguments are placed on stack with 32-bit
629 Assembler::asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params)
631 // The stack pointer must always be at least aligned to 4 bytes.
632 NanoAssert((params.stkd & 3) == 0);
634 if (ty == ARGTYPE_D) {
635 // This task is fairly complex and so is delegated to asm_arg_64.
636 asm_arg_64(arg, params);
638 NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
639 // pre-assign registers R0-R3 for arguments (if they fit)
641 asm_regarg(ty, arg, params.r);
642 params.r = Register(params.r + 1);
644 asm_stkarg(arg, params.stkd);
650 // Encode a 64-bit floating-point argument using the appropriate ABI.
651 // This function operates in the same way as asm_arg, except that it will only
652 // handle arguments where (ArgType)ty == ARGTYPE_D.
654 #ifdef NJ_ARM_EABI_HARD_FLOAT
656 Assembler::asm_arg_64(LIns* arg, ParameterRegisters& params)
658 NanoAssert(IsFpReg(params.float_r));
659 if (params.float_r <= D7) {
660 findSpecificRegFor(arg, params.float_r);
661 params.float_r = Register(params.float_r + 1);
663 NanoAssertMsg(0, "Only 8 floating point arguments supported");
669 Assembler::asm_arg_64(LIns* arg, ParameterRegisters& params)
671 // The stack pointer must always be at least aligned to 4 bytes.
672 NanoAssert((params.stkd & 3) == 0);
673 // The only use for this function when we are using soft floating-point
675 NanoAssert(ARM_VFP || arg->isop(LIR_ii2d));
678 // EABI requires that 64-bit arguments are aligned on even-numbered
679 // registers, as R0:R1 or R2:R3. If the register base is at an
680 // odd-numbered register, advance it. Note that this will push r past
681 // R3 if r is R3 to start with, and will force the argument to go on
683 if ((params.r == R1) || (params.r == R3)) {
684 params.r = Register(params.r + 1);
689 Register ra = params.r;
690 Register rb = Register(params.r + 1);
691 params.r = Register(rb + 1);
694 // EABI requires that 64-bit arguments are aligned on even-numbered
695 // registers, as R0:R1 or R2:R3.
696 NanoAssert( ((ra == R0) && (rb == R1)) || ((ra == R2) && (rb == R3)) );
699 // Put the argument in ra and rb. If the argument is in a VFP register,
700 // use FMRRD to move it to ra and rb. Otherwise, let asm_regarg deal
701 // with the argument as if it were two 32-bit arguments.
703 Register dm = findRegFor(arg, FpRegs);
706 asm_regarg(ARGTYPE_I, arg->oprnd1(), ra);
707 asm_regarg(ARGTYPE_I, arg->oprnd2(), rb);
711 } else if (params.r == R3) {
712 // We only have one register left, but the legacy ABI requires that we
713 // put 32 bits of the argument in the register (R3) and the remaining
714 // 32 bits on the stack.
715 Register ra = params.r; // R3
718 // We're splitting the argument between registers and the stack. This
719 // must be the first time that the stack is used, so stkd must be at 0.
720 NanoAssert(params.stkd == 0);
723 Register dm = findRegFor(arg, FpRegs);
724 // TODO: We could optimize the this to store directly from
725 // the VFP register to memory using "FMRRD ra, fp_reg[31:0]" and
726 // "STR fp_reg[63:32], [SP, #stkd]".
728 // Load from the floating-point register as usual, but use IP
729 // as a swap register.
733 // Without VFP, we can simply use asm_regarg and asm_stkarg to
734 // encode the two 32-bit words as we don't need to load from a VFP
736 asm_regarg(ARGTYPE_I, arg->oprnd1(), ra);
737 asm_stkarg(arg->oprnd2(), 0);
742 // The argument won't fit in registers, so pass on to asm_stkarg.
744 // EABI requires that 64-bit arguments are 64-bit aligned.
745 if ((params.stkd & 7) != 0) {
746 // stkd will always be aligned to at least 4 bytes; this was
747 // asserted on entry to this function.
752 asm_stkarg(arg, params.stkd);
754 asm_stkarg(arg->oprnd1(), params.stkd);
755 asm_stkarg(arg->oprnd2(), params.stkd+4);
760 #endif // NJ_ARM_EABI_HARD_FLOAT
763 Assembler::asm_regarg(ArgType ty, LIns* p, Register rd)
765 // Note that we don't have to prepareResultReg here because it is already
766 // done by the caller, and the target register is passed as 'rd'.
767 // Similarly, we don't have to freeResourcesOf(p).
769 if (ty == ARGTYPE_I || ty == ARGTYPE_UI)
771 // Put the argument in register rd.
773 asm_ld_imm(rd, p->immI());
776 MOV(rd, p->getReg());
778 // Re-use the target register if the source is no longer
779 // required. This saves a MOV instruction.
780 findSpecificRegForUnallocated(p, rd);
784 NanoAssert(ty == ARGTYPE_D);
785 // Floating-point arguments are handled as two integer arguments.
791 Assembler::asm_stkarg(LIns* arg, int stkd)
793 // The ABI doesn't allow accesses below the SP.
794 NanoAssert(stkd >= 0);
795 // The argument resides somewhere in registers, so we simply need to
796 // push it onto the stack.
798 Register rt = findRegFor(arg, GpRegs);
799 asm_str(rt, SP, stkd);
801 // According to the comments in asm_arg_64, LIR_ii2d
802 // can have a 64-bit argument even if VFP is disabled. However,
803 // asm_arg_64 will split the argument and issue two 32-bit
804 // arguments to asm_stkarg so we can ignore that case here.
805 NanoAssert(arg->isD());
807 Register dt = findRegFor(arg, FpRegs);
809 // EABI requires that 64-bit arguments are 64-bit aligned.
810 NanoAssert((stkd % 8) == 0);
817 Assembler::asm_call(LIns* ins)
819 if (ARM_VFP && ins->isop(LIR_calld)) {
820 /* Because ARM actually returns the result in (R0,R1), and not in a
821 * floating point register, the code to move the result into a correct
822 * register is below. We do nothing here.
824 * The reason being that if we did something here, the final code
825 * sequence we'd get would be something like:
826 * MOV {R0-R3},params [from below]
827 * BL function [from below]
828 * MOV {R0-R3},spilled data [from evictScratchRegsExcept()]
829 * MOV Dx,{R0,R1} [from here]
830 * which is clearly broken.
832 * This is not a problem for non-floating point calls, because the
833 * restoring of spilled data into R0 is done via a call to
834 * prepareResultReg(R0) in the other branch of this if-then-else,
835 * meaning that evictScratchRegsExcept() will not modify R0. However,
836 * prepareResultReg is not aware of the concept of using a register
837 * pair (R0,R1) for the result of a single operation, so it can only be
838 * used here with the ultimate VFP register, and not R0/R1, which
839 * potentially allows for R0/R1 to get corrupted as described.
841 #ifdef NJ_ARM_EABI_HARD_FLOAT
842 /* With ARM hardware floating point ABI, D0 is used to return the double
843 * from the function. We need to prepare it like we do for R0 in the else
846 prepareResultReg(ins, rmask(D0));
847 freeResourcesOf(ins);
849 } else if (!ins->isop(LIR_callv)) {
850 prepareResultReg(ins, rmask(retRegs[0]));
851 // Immediately free the resources as we need to re-use the register for
853 freeResourcesOf(ins);
856 // Do this after we've handled the call result, so we don't
857 // force the call result to be spilled unnecessarily.
859 evictScratchRegsExcept(0);
861 const CallInfo* ci = ins->callInfo();
862 ArgType argTypes[MAXARGS];
863 uint32_t argc = ci->getArgTypes(argTypes);
864 bool indirect = ci->isIndirect();
866 // If we aren't using VFP, assert that the LIR operation is an integer
868 NanoAssert(ARM_VFP || ins->isop(LIR_callv) || ins->isop(LIR_calli));
870 // If we're using VFP, but not hardware floating point ABI, and
871 // the return type is a double, it'll come back in R0/R1.
872 // We need to either place it in the result fp reg, or store it.
873 // See comments above for more details as to why this is necessary here
874 // for floating point calls, but not for integer calls.
875 if (!ARM_EABI_HARD && ARM_VFP && ins->isExtant()) {
876 // If the result size is a floating-point value, treat the result
877 // specially, as described previously.
878 if (ci->returnType() == ARGTYPE_D) {
879 NanoAssert(ins->isop(LIR_calld));
881 if (ins->isInReg()) {
882 Register dd = ins->getReg();
883 // Copy the result to the (VFP) result register.
886 int d = findMemFor(ins);
887 // Immediately free the resources so the arguments can re-use
889 freeResourcesOf(ins);
891 // The result doesn't have a register allocated, so store the
892 // result (in R0,R1) directly to its stack slot.
893 asm_str(R0, FP, d+0);
894 asm_str(R1, FP, d+4);
901 verbose_only(if (_logc->lcbits & LC_Native)
902 outputf(" %p:", _nIns);
905 BranchWithLink((NIns*)ci->_address);
907 // Indirect call: we assign the address arg to LR
908 if (ARM_ARCH_AT_LEAST(5)) {
910 // workaround for msft device emulator bug (blx lr emulated as no-op)
923 asm_regarg(ARGTYPE_I, ins->arg(--argc), LR);
926 // Encode the arguments, starting at R0 and with an empty argument stack (0).
927 // With hardware fp ABI, floating point arguments start from D0.
928 ParameterRegisters params = init_params(0, R0, D0);
930 // Iterate through the argument list and encode each argument according to
932 // Note that we loop through the arguments backwards as LIR specifies them
936 asm_arg(argTypes[i], ins->arg(i), params);
939 if (params.stkd > max_out_args) {
940 max_out_args = params.stkd;
945 Assembler::nRegisterAllocFromSet(RegisterMask set)
947 NanoAssert(set != 0);
949 // The CountLeadingZeroes function will use the CLZ instruction where
950 // available. In other cases, it will fall back to a (slower) C
952 Register r = (Register)(31-CountLeadingZeroes(set));
953 _allocator.free &= ~rmask(r);
955 NanoAssert(IsGpReg(r) || IsFpReg(r));
956 NanoAssert((rmask(r) & set) == rmask(r));
962 Assembler::nRegisterResetAll(RegAlloc& a)
964 // add scratch registers to our free list for the allocator
967 rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) |
968 rmask(R5) | rmask(R6) | rmask(R7) | rmask(R8) | rmask(R9) |
969 rmask(R10) | rmask(LR);
972 rmask(D0) | rmask(D1) | rmask(D2) | rmask(D3) |
973 rmask(D4) | rmask(D5) | rmask(D6) | rmask(D7);
977 static inline ConditionCode
980 return ConditionCode((*ins >> 28) & 0xF);
984 branch_is_B(NIns* branch)
986 return (*branch & 0x0E000000) == 0x0A000000;
990 branch_is_LDR_PC(NIns* branch)
992 return (*branch & 0x0F7FF000) == 0x051FF000;
995 // Is this an instruction of the form ldr/str reg, [fp, #-imm] ?
997 is_ldstr_reg_fp_minus_imm(/*OUT*/uint32_t* isLoad, /*OUT*/uint32_t* rX,
998 /*OUT*/uint32_t* immX, NIns i1)
1000 if ((i1 & 0xFFEF0000) != 0xE50B0000)
1002 *isLoad = (i1 >> 20) & 1;
1003 *rX = (i1 >> 12) & 0xF;
1008 // Is this an instruction of the form ldmdb/stmdb fp, regset ?
1010 is_ldstmdb_fp(/*OUT*/uint32_t* isLoad, /*OUT*/uint32_t* regSet, NIns i1)
1012 if ((i1 & 0xFFEF0000) != 0xE90B0000)
1014 *isLoad = (i1 >> 20) & 1;
1015 *regSet = i1 & 0xFFFF;
1019 // Make an instruction of the form ldmdb/stmdb fp, regset
1021 mk_ldstmdb_fp(uint32_t isLoad, uint32_t regSet)
1023 return 0xE90B0000 | (regSet & 0xFFFF) | ((isLoad & 1) << 20);
1026 // Compute the number of 1 bits in the lowest 16 bits of regSet
1027 static inline uint32_t
1028 size_of_regSet(uint32_t regSet)
1030 uint32_t x = regSet;
1031 x = (x & 0x5555) + ((x >> 1) & 0x5555);
1032 x = (x & 0x3333) + ((x >> 2) & 0x3333);
1033 x = (x & 0x0F0F) + ((x >> 4) & 0x0F0F);
1034 x = (x & 0x00FF) + ((x >> 8) & 0x00FF);
1038 // See if two ARM instructions, i1 and i2, can be combined into one
1040 do_peep_2_1(/*OUT*/NIns* merged, NIns i1, NIns i2)
1042 uint32_t rX, rY, immX, immY, isLoadX, isLoadY, regSet;
1043 /* ld/str rX, [fp, #-8]
1044 ld/str rY, [fp, #-4]
1046 ld/stmdb fp, {rX, rY}
1048 X < Y and X != fp and Y != fp and X != 15 and Y != 15
1050 if (is_ldstr_reg_fp_minus_imm(&isLoadX, &rX, &immX, i1) &&
1051 is_ldstr_reg_fp_minus_imm(&isLoadY, &rY, &immY, i2) &&
1052 immX == 8 && immY == 4 && rX < rY &&
1053 isLoadX == isLoadY &&
1054 rX != FP && rY != FP &&
1055 rX != 15 && rY != 15) {
1056 *merged = mk_ldstmdb_fp(isLoadX, (1 << rX) | (1<<rY));
1059 /* ld/str rX, [fp, #-N]
1062 ld/stmdb fp, union(regset,{rX})
1065 X < all elements of regset
1066 N == 4 * (1 + card(regset))
1069 if (is_ldstr_reg_fp_minus_imm(&isLoadX, &rX, &immX, i1) &&
1070 is_ldstmdb_fp(&isLoadY, ®Set, i2) &&
1072 (regSet & ((1 << (rX + 1)) - 1)) == 0 &&
1073 immX == 4 * (1 + size_of_regSet(regSet)) &&
1074 isLoadX == isLoadY &&
1075 rX != FP && rX != 15) {
1076 *merged = mk_ldstmdb_fp(isLoadX, regSet | (1 << rX));
1082 // Determine whether or not it's safe to look at _nIns[1].
1083 // Necessary condition for safe peepholing with do_peep_2_1.
1085 does_next_instruction_exist(NIns* _nIns, NIns* codeStart, NIns* codeEnd,
1086 NIns* exitStart, NIns* exitEnd)
1088 return (exitStart <= _nIns && _nIns+1 < exitEnd) ||
1089 (codeStart <= _nIns && _nIns+1 < codeEnd);
1093 Assembler::nPatchBranch(NIns* branch, NIns* target)
1095 // Patch the jump in a loop
1098 // There are two feasible cases here, the first of which has 2 sub-cases:
1100 // (1) We are patching a patchable unconditional jump emitted by
1101 // JMP_far. All possible encodings we may be looking at with
1102 // involve 2 words, though we *may* have to change from 1 word to
1105 // 1a: B ±32MB ; BKPT
1106 // 1b: LDR PC [PC, #-4] ; $imm
1108 // (2) We are patching a patchable conditional jump emitted by
1109 // B_cond_chk. Short conditional jumps are non-patchable, so we
1110 // won't have one here; will only ever have an instruction of the
1113 // LDRcc PC [PC, #lit] ...
1115 // We don't actually know whether the lit-address is in the
1116 // constant pool or in-line of the instruction stream, following
1117 // the insn (with a jump over it) and we don't need to. For our
1118 // purposes here, cases 2, 3 and 4 all look the same.
1120 // For purposes of handling our patching task, we group cases 1b and 2
1121 // together, and handle case 1a on its own as it might require expanding
1122 // from a short-jump to a long-jump.
1124 // We do not handle contracting from a long-jump to a short-jump, though
1125 // this is a possible future optimisation for case 1b. For now it seems
1126 // not worth the trouble.
1129 if (branch_is_B(branch)) {
1131 // A short B branch, must be unconditional.
1132 NanoAssert(get_cc(branch) == AL);
1134 int32_t offset = PC_OFFSET_FROM(target, branch);
1135 if (isS24(offset>>2)) {
1136 // We can preserve the existing form, just rewrite its offset.
1137 NIns cond = *branch & 0xF0000000;
1138 *branch = (NIns)( cond | (0xA<<24) | ((offset>>2) & 0xFFFFFF) );
1140 // We need to expand the existing branch to a long jump.
1141 // make sure the next instruction is a dummy BKPT
1142 NanoAssert(*(branch+1) == BKPT_insn);
1144 // Set the branch instruction to LDRcc pc, [pc, #-4]
1145 NIns cond = *branch & 0xF0000000;
1146 *branch++ = (NIns)( cond | (0x51<<20) | (PC<<16) | (PC<<12) | (4));
1147 *branch++ = (NIns)target;
1151 // Not a B branch, must be LDR, might be any kind of condition.
1152 NanoAssert(branch_is_LDR_PC(branch));
1154 NIns *addr = branch+2;
1155 int offset = (*branch & 0xFFF) / sizeof(NIns);
1156 if (*branch & (1<<23)) {
1162 // Just redirect the jump target, leave the insn alone.
1163 *addr = (NIns) target;
1168 Assembler::nHint(LIns* ins)
1170 NanoAssert(ins->isop(LIR_paramp));
1171 RegisterMask prefer = 0;
1172 if (ins->paramKind() == 0)
1173 if (ins->paramArg() < 4)
1174 prefer = rmask(argRegs[ins->paramArg()]);
1179 Assembler::asm_qjoin(LIns *ins)
1181 int d = findMemFor(ins);
1183 LIns* lo = ins->oprnd1();
1184 LIns* hi = ins->oprnd2();
1189 findRegFor2(GpRegs, lo, rlo, GpRegs, hi, rhi);
1191 asm_str(rhi, FP, d+4);
1192 asm_str(rlo, FP, d);
1194 freeResourcesOf(ins);
1198 Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
1201 getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
1205 if (isU12(-dr) || isU12(dr)) {
1209 asm_add_imm(IP, rb, dr);
1213 if (isU12(-dr) || isU12(dr)) {
1217 asm_add_imm(IP, rb, dr);
1221 // Similar to the sti/stb case, but the max offset is smaller.
1222 if (isU8(-dr) || isU8(dr)) {
1226 asm_add_imm(IP, rb, dr);
1230 NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
1236 canRematALU(LIns *ins)
1238 // Return true if we can generate code for this instruction that neither
1239 // sets CCs, clobbers an input register, nor requires allocating a register.
1240 switch (ins->opcode()) {
1246 return ins->oprnd1()->isInReg() && ins->oprnd2()->isImmI();
1254 Assembler::canRemat(LIns* ins)
1256 return ins->isImmI() || ins->isop(LIR_allocp) || canRematALU(ins);
1260 Assembler::asm_restore(LIns* i, Register r)
1262 // The following registers should never be restored:
1263 NanoAssert(r != PC);
1264 NanoAssert(r != IP);
1265 NanoAssert(r != SP);
1267 if (i->isop(LIR_allocp)) {
1268 int d = findMemFor(i);
1269 asm_add_imm(r, FP, d);
1270 } else if (i->isImmI()) {
1271 asm_ld_imm(r, i->immI());
1272 } else if (canRematALU(i)) {
1273 Register rn = i->oprnd1()->getReg();
1274 int32_t imm = i->oprnd2()->immI();
1275 switch (i->opcode()) {
1276 case LIR_addi: asm_add_imm(r, rn, imm, /*stat=*/ 0); break;
1277 case LIR_subi: asm_sub_imm(r, rn, imm, /*stat=*/ 0); break;
1278 case LIR_andi: asm_and_imm(r, rn, imm, /*stat=*/ 0); break;
1279 case LIR_ori: asm_orr_imm(r, rn, imm, /*stat=*/ 0); break;
1280 case LIR_xori: asm_eor_imm(r, rn, imm, /*stat=*/ 0); break;
1281 default: NanoAssert(0); break;
1284 // We can't easily load immediate values directly into FP registers, so
1285 // ensure that memory is allocated for the constant and load it from
1287 int d = findMemFor(i);
1288 if (ARM_VFP && IsFpReg(r)) {
1289 if (isU8(d/4) || isU8(-d/4)) {
1292 FLDD(r, IP, d%1024);
1293 asm_add_imm(IP, FP, d-(d%1024));
1298 // See if we can merge this load into an immediately following
1299 // one, by creating or extending an LDM instruction.
1300 if (/* is it safe to poke _nIns[1] ? */
1301 does_next_instruction_exist(_nIns, codeStart, codeEnd,
1303 && /* can we merge _nIns[0] into _nIns[1] ? */
1304 do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
1307 verbose_only( asm_output("merge next into LDMDB"); )
1314 Assembler::asm_spill(Register rr, int d, bool quad)
1318 // The following registers should never be spilled:
1319 NanoAssert(rr != PC);
1320 NanoAssert(rr != IP);
1321 NanoAssert(rr != SP);
1322 if (ARM_VFP && IsFpReg(rr)) {
1323 if (isU8(d/4) || isU8(-d/4)) {
1326 FSTD(rr, IP, d%1024);
1327 asm_add_imm(IP, FP, d-(d%1024));
1331 // asm_str always succeeds, but returns '1' to indicate that it emitted
1332 // a simple, easy-to-merge STR.
1333 if (asm_str(rr, FP, d)) {
1334 // See if we can merge this store into an immediately following one,
1335 // one, by creating or extending a STM instruction.
1336 if (/* is it safe to poke _nIns[1] ? */
1337 does_next_instruction_exist(_nIns, codeStart, codeEnd,
1339 && /* can we merge _nIns[0] into _nIns[1] ? */
1340 do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
1343 verbose_only( asm_output("merge next into STMDB"); )
1350 Assembler::asm_load64(LIns* ins)
1352 NanoAssert(ins->isD());
1356 LIns* base = ins->oprnd1();
1357 Register rn = findRegFor(base, GpRegs);
1358 int offset = ins->disp();
1360 if (ins->isInReg()) {
1361 dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
1363 // If the result isn't already in a register, use the VFP scratch
1364 // register for the result and store it directly into memory.
1365 NanoAssert(ins->isInAr());
1366 int d = arDisp(ins);
1369 // VFP can only do loads and stores with a range of ±1020, so we
1370 // might need to do some arithmetic to extend its range.
1371 if (isU8(d/4) || isU8(-d/4)) {
1374 FSTD(dd, IP, d%1024);
1375 asm_add_imm(IP, FP, d-(d%1024));
1379 switch (ins->opcode()) {
1381 if (isU8(offset/4) || isU8(-offset/4)) {
1382 FLDD(dd, rn, offset);
1384 FLDD(dd, IP, offset%1024);
1385 asm_add_imm(IP, rn, offset-(offset%1024));
1391 if (isU8(offset/4) || isU8(-offset/4)) {
1392 FLDS(S0, rn, offset);
1394 FLDS(S0, IP, offset%1024);
1395 asm_add_imm(IP, rn, offset-(offset%1024));
1399 NanoAssertMsg(0, "LIR opcode unsupported by asm_load64.");
1403 NanoAssert(ins->isInAr());
1404 int d = arDisp(ins);
1406 LIns* base = ins->oprnd1();
1407 Register rn = findRegFor(base, GpRegs);
1408 int offset = ins->disp();
1410 switch (ins->opcode()) {
1412 asm_mmq(FP, d, rn, offset);
1415 NanoAssertMsg(0, "LIR_ldf2d is not yet implemented for soft-float.");
1418 NanoAssertMsg(0, "LIR opcode unsupported by asm_load64.");
1423 freeResourcesOf(ins);
1427 Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
1429 NanoAssert(value->isD());
1432 Register dd = findRegFor(value, FpRegs & ~rmask(D0));
1433 Register rn = findRegFor(base, GpRegs);
1437 // VFP can only do stores with a range of ±1020, so we might
1438 // need to do some arithmetic to extend its range.
1439 if (isU8(dr/4) || isU8(-dr/4)) {
1442 FSTD(dd, IP, dr%1024);
1443 asm_add_imm(IP, rn, dr-(dr%1024));
1448 // VFP can only do stores with a range of ±1020, so we might
1449 // need to do some arithmetic to extend its range.
1451 if (isU8(dr/4) || isU8(-dr/4)) {
1454 FSTS(S0, IP, dr%1024);
1455 asm_add_imm(IP, rn, dr-(dr%1024));
1462 NanoAssertMsg(0, "LIR opcode unsupported by asm_store64.");
1466 int d = findMemFor(value);
1467 Register rn = findRegFor(base, GpRegs);
1471 // Doubles in soft-float never get registers allocated, so this
1472 // is always a simple two-word memcpy.
1473 // *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
1474 asm_mmq(rn, dr, FP, d);
1477 NanoAssertMsg(0, "TODO: Soft-float implementation of LIR_std2f.");
1480 NanoAssertMsg(0, "LIR opcode unsupported by asm_store64.");
1486 // Load the float64 specified by immDhi:immDlo into VFP register dd.
1488 Assembler::asm_immd_nochk(Register dd, int32_t immDlo, int32_t immDhi)
1490 // We're not going to use a slot, because it might be too far
1491 // away. Instead, we're going to stick a branch in the stream to
1492 // jump over the constants, and then load from a short PC relative
1495 // stream should look like:
1503 *(--_nIns) = (NIns) immDhi;
1504 *(--_nIns) = (NIns) immDlo;
1510 Assembler::asm_immd(LIns* ins)
1512 // If the value isn't in a register, it's simplest to use integer
1513 // instructions to put the value in its stack slot. Otherwise, use a VFP
1514 // load to get the value from a literal pool.
1515 if (ARM_VFP && ins->isInReg()) {
1516 Register dd = prepareResultReg(ins, FpRegs);
1517 underrunProtect(4*4);
1518 asm_immd_nochk(dd, ins->immDlo(), ins->immDhi());
1520 NanoAssert(ins->isInAr());
1521 int d = arDisp(ins);
1522 asm_str(IP, FP, d+4);
1523 asm_ld_imm(IP, ins->immDhi());
1525 asm_ld_imm(IP, ins->immDlo());
1528 freeResourcesOf(ins);
1532 Assembler::asm_nongp_copy(Register r, Register s)
1534 if (ARM_VFP && IsFpReg(r) && IsFpReg(s)) {
1538 // We can't move a double-precision FP register into a 32-bit GP
1539 // register, so assert that no calling code is trying to do that.
1545 * copy 64 bits: (rd+dd) <- (rs+ds)
1548 Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
1550 // The value is either a 64bit struct or maybe a float that isn't live in
1551 // an FPU reg. Either way, don't put it in an FPU reg just to load & store
1553 // This operation becomes a simple 64-bit memcpy.
1555 // In order to make the operation optimal, we will require two GP
1556 // registers. We can't allocate a register here because the caller may have
1557 // called deprecated_freeRsrcOf, and allocating a register here may cause something
1558 // else to spill onto the stack which has just be conveniently freed by
1559 // deprecated_freeRsrcOf (resulting in stack corruption).
1561 // Falling back to a single-register implementation of asm_mmq is better
1562 // than adjusting the callers' behaviour (to allow us to allocate another
1563 // register here) because spilling a register will end up being slower than
1564 // just using the same register twice anyway.
1566 // Thus, if there is a free register which we can borrow, we will emit the
1568 // LDR rr, [rs, #ds]
1569 // LDR ip, [rs, #(ds+4)]
1570 // STR rr, [rd, #dd]
1571 // STR ip, [rd, #(dd+4)]
1572 // (Where rr is the borrowed register.)
1574 // If there is no free register, don't spill an existing allocation. Just
1575 // do the following:
1576 // LDR ip, [rs, #ds]
1577 // STR ip, [rd, #dd]
1578 // LDR ip, [rs, #(ds+4)]
1579 // STR ip, [rd, #(dd+4)]
1581 // Note that if rs+4 or rd+4 is outside the LDR or STR range, extra
1582 // instructions will be emitted as required to make the code work.
1584 // Ensure that the PC is not used as either base register. The instruction
1585 // generation macros call underrunProtect, and a side effect of this is
1586 // that we may be pushed onto another page, so the PC is not a reliable
1588 NanoAssert(rs != PC);
1589 NanoAssert(rd != PC);
1591 // We use IP as a swap register, so check that it isn't used for something
1592 // else by the caller.
1593 NanoAssert(rs != IP);
1594 NanoAssert(rd != IP);
1596 // Find the list of free registers from the allocator's free list and the
1597 // GpRegs mask. This excludes any floating-point registers that may be on
1599 RegisterMask free = _allocator.free & AllowableFlagRegs;
1601 // Ensure that ds and dd are within the +/-4095 offset range of STR and
1602 // LDR. If either is out of range, adjust and modify rd or rs so that the
1603 // load works correctly.
1604 // The modification here is performed after the LDR/STR block (because code
1605 // is emitted backwards), so this one is the reverse operation.
1610 if ((dd+4) >= 0x1000) {
1611 dd_adj = ((dd+4) & ~0xfff);
1612 } else if (dd <= -0x1000) {
1613 dd_adj = -((-dd) & ~0xfff);
1615 if ((ds+4) >= 0x1000) {
1616 ds_adj = ((ds+4) & ~0xfff);
1617 } else if (ds <= -0x1000) {
1618 ds_adj = -((-ds) & ~0xfff);
1621 // These will emit no code if d*_adj is 0.
1622 asm_sub_imm(rd, rd, dd_adj);
1623 asm_sub_imm(rs, rs, ds_adj);
1629 // There is at least one register on the free list, so grab one for
1630 // temporary use. There is no need to allocate it explicitly because
1631 // we won't need it after this function returns.
1633 // The CountLeadingZeroes utility can be used to quickly find a set bit
1634 // in the free mask.
1635 Register rr = (Register)(31-CountLeadingZeroes(free));
1637 // Note: Not every register in GpRegs is usable here. However, these
1638 // registers will never appear on the free list.
1639 NanoAssert((free & rmask(PC)) == 0);
1640 NanoAssert((free & rmask(LR)) == 0);
1641 NanoAssert((free & rmask(SP)) == 0);
1642 NanoAssert((free & rmask(IP)) == 0);
1643 NanoAssert((free & rmask(FP)) == 0);
1645 // Emit the actual instruction sequence.
1651 // There are no free registers, so fall back to using IP twice.
1658 // Re-adjust the base registers. (These will emit no code if d*_adj is 0.
1659 asm_add_imm(rd, rd, dd_adj);
1660 asm_add_imm(rs, rs, ds_adj);
1663 // Increment the 32-bit profiling counter at pCtr, without
1664 // changing any registers.
1666 void Assembler::asm_inc_m32(uint32_t* pCtr)
1668 // We need to temporarily free up two registers to do this, so
1669 // just push r0 and r1 on the stack. This assumes that the area
1670 // at r13 - 8 .. r13 - 1 isn't being used for anything else at
1671 // this point. This guaranteed us by the EABI; although the
1672 // situation with the legacy ABI I'm not sure of.
1674 // Plan: emit the following bit of code. It's not efficient, but
1675 // this is for profiling debug builds only, and is self contained,
1676 // except for above comment re stack use.
1678 // E92D0003 push {r0,r1}
1679 // E59F0000 ldr r0, [r15] ; pCtr
1680 // EA000000 b .+8 ; jump over imm
1681 // 12345678 .word 0x12345678 ; pCtr
1682 // E5901000 ldr r1, [r0]
1683 // E2811001 add r1, r1, #1
1684 // E5801000 str r1, [r0]
1685 // E8BD0003 pop {r0,r1}
1687 // We need keep the 4 words beginning at "ldr r0, [r15]"
1688 // together. Simplest to underrunProtect the whole thing.
1689 underrunProtect(8*4);
1690 IMM32(0xE8BD0003); // pop {r0,r1}
1691 IMM32(0xE5801000); // str r1, [r0]
1692 IMM32(0xE2811001); // add r1, r1, #1
1693 IMM32(0xE5901000); // ldr r1, [r0]
1694 IMM32((uint32_t)pCtr); // .word pCtr
1695 IMM32(0xEA000000); // b .+8
1696 IMM32(0xE59F0000); // ldr r0, [r15]
1697 IMM32(0xE92D0003); // push {r0,r1}
1702 Assembler::nativePageReset()
1709 Assembler::nativePageSetup()
1711 NanoAssert(!_inExit);
1713 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes), NJ_MAX_CPOOL_OFFSET);
1715 // constpool starts at top of page and goes down,
1716 // code starts at bottom of page and moves up
1723 Assembler::underrunProtect(int bytes)
1725 NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
1726 NanoAssert(_nSlot != 0 && int(_nIns)-int(_nSlot) <= 4096);
1727 uintptr_t top = uintptr_t(_nSlot);
1728 uintptr_t pc = uintptr_t(_nIns);
1729 if (pc - bytes < top)
1731 verbose_only(verbose_outputf(" %p:", _nIns);)
1732 NIns* target = _nIns;
1733 // This may be in a normal code chunk or an exit code chunk.
1734 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes), NJ_MAX_CPOOL_OFFSET);
1738 // _nSlot points to the first empty position in the new code block
1739 // _nIns points just past the last empty position.
1740 // Assume B_nochk won't ever try to write to _nSlot. See B_cond_chk macro.
1746 Assembler::JMP_far(NIns* addr)
1748 // Even if a simple branch is all that is required, this function must emit
1749 // two words so that the branch can be arbitrarily patched later on.
1752 intptr_t offs = PC_OFFSET_FROM(addr,_nIns-2);
1754 if (isS24(offs>>2)) {
1755 // Emit a BKPT to ensure that we reserve enough space for a full 32-bit
1756 // branch patch later on. The BKPT should never be executed.
1762 *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((offs>>2) & 0xFFFFFF) );
1764 asm_output("b %p", (void*)addr);
1766 // Insert the target address as a constant in the instruction stream.
1767 *(--_nIns) = (NIns)((addr));
1768 // ldr pc, [pc, #-4] // load the address into pc, reading it from [pc-4] (e.g.,
1769 // the next instruction)
1770 *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4));
1772 asm_output("ldr pc, =%p", (void*)addr);
1776 // Perform a branch with link, and ARM/Thumb exchange if necessary. The actual
1777 // BLX instruction is only available from ARMv5 onwards, but as we don't
1778 // support anything older than that this function will not attempt to output
1779 // pre-ARMv5 sequences.
1781 // Note: This function is not designed to be used with branches which will be
1782 // patched later, though it will work if the patcher knows how to patch the
1783 // generated instruction sequence.
1785 Assembler::BranchWithLink(NIns* addr)
1787 // Most branches emitted by TM are loaded through a register, so always
1788 // reserve enough space for the LDR sequence. This should give us a slight
1789 // net gain over reserving the exact amount required for shorter branches.
1790 // This _must_ be called before PC_OFFSET_FROM as it can move _nIns!
1791 underrunProtect(8+LD32_size);
1793 // Calculate the offset from the instruction that is about to be
1794 // written (at _nIns-1) to the target.
1795 intptr_t offs = PC_OFFSET_FROM(addr,_nIns-1);
1797 // ARMv5 and above can use BLX <imm> for branches within ±32MB of the
1798 // PC and BLX Rm for long branches.
1799 if (isS24(offs>>2)) {
1800 // the value we need to stick in the instruction; masked,
1801 // because it will be sign-extended back to 32 bits.
1802 intptr_t offs2 = (offs>>2) & 0xffffff;
1804 if (((intptr_t)addr & 1) == 0) {
1805 // The target is ARM, so just emit a BL.
1808 *(--_nIns) = (NIns)( (COND_AL) | (0xB<<24) | (offs2) );
1809 asm_output("bl %p", (void*)addr);
1811 } else if (ARM_ARCH_AT_LEAST(5)) {
1812 // The target is Thumb, so emit a BLX (ARMv5+)
1813 // The (pre-shifted) value of the "H" bit in the BLX encoding.
1814 uint32_t H = (offs & 0x2) << 23;
1817 *(--_nIns) = (NIns)( (0xF << 28) | (0x5<<25) | (H) | (offs2) );
1818 asm_output("blx %p", (void*)addr);
1821 /* If we get here, it means we are on ARMv4T, and the target is Thumb,
1822 in which case we want to emit a branch with a register */
1824 if (ARM_ARCH_AT_LEAST(5)) {
1825 // Load the target address into IP and branch to that. We've already
1826 // done underrunProtect, so we can skip that here.
1833 asm_ld_imm(IP, (int32_t)addr, false);
1836 // This is identical to BranchWithLink(NIns*) but emits a branch to an address
1837 // held in a register rather than a literal address.
1839 Assembler::BLX(Register addr, bool chk /* = true */)
1841 // We need to emit an ARMv5+ instruction, so assert that we have a suitable
1842 // processor. Note that we don't support ARMv4(T), but this serves as a
1843 // useful sanity check.
1844 NanoAssert(ARM_ARCH_AT_LEAST(5));
1846 NanoAssert(IsGpReg(addr));
1848 // There is a bug in the WinCE device emulator which stops "BLX LR" from
1849 // working as expected. Assert that we never do that!
1850 NanoAssert(addr != LR);
1858 *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
1859 asm_output("blx %s", gpn(addr));
1862 // Emit the code required to load a memory address into a register as follows:
1864 // underrunProtect calls from this function can be disabled by setting chk to
1865 // false. However, this function can use more than LD32_size bytes of space if
1866 // the offset is out of the range of a LDR instruction; the maximum space this
1867 // function requires for underrunProtect is 4+LD32_size.
1869 Assembler::asm_ldr_chk(Register d, Register b, int32_t off, bool chk)
1871 if (ARM_VFP && IsFpReg(d)) {
1872 FLDD_chk(d,b,off,chk);
1876 NanoAssert(IsGpReg(d));
1877 NanoAssert(IsGpReg(b));
1879 // We can't use underrunProtect if the base register is the PC because
1880 // underrunProtect might move the PC if there isn't enough space on the
1882 NanoAssert((b != PC) || (!chk));
1886 if (chk) underrunProtect(4);
1887 *(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (b<<16) | (d<<12) | off );
1888 } else if (isU12(-off)) {
1890 if (chk) underrunProtect(4);
1891 *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (b<<16) | (d<<12) | -off );
1893 // The offset is over 4096 (and outside the range of LDR), so we need
1894 // to add a level of indirection to get the address into IP.
1896 // Because of that, we can't do a PC-relative load unless it fits within
1897 // the single-instruction forms above.
1899 NanoAssert(b != PC);
1900 NanoAssert(b != IP);
1902 if (chk) underrunProtect(4+LD32_size);
1904 *(--_nIns) = (NIns)( COND_AL | (0x79<<20) | (b<<16) | (d<<12) | IP );
1905 asm_ld_imm(IP, off, false);
1908 asm_output("ldr %s, [%s, #%d]",gpn(d),gpn(b),(off));
1911 // Emit a store, using a register base and an arbitrary immediate offset. This
1912 // behaves like a STR instruction, but doesn't care about the offset range, and
1913 // emits one of the following instruction sequences:
1916 // STR rt, [rr, #offset]
1918 // asm_add_imm ip, rr, #(offset & ~0xfff)
1919 // STR rt, [ip, #(offset & 0xfff)]
1921 // # This one's fairly horrible, but should be rare.
1922 // asm_add_imm rr, rr, #(offset & ~0xfff)
1923 // STR rt, [ip, #(offset & 0xfff)]
1924 // asm_sub_imm rr, rr, #(offset & ~0xfff)
1926 // SUB-based variants (for negative offsets) are also supported.
1929 // The return value is 1 if a simple STR could be emitted, or 0 if the required
1930 // sequence was more complex.
1932 Assembler::asm_str(Register rt, Register rr, int32_t offset)
1934 // We can't do PC-relative stores, and we can't store the PC value, because
1935 // we use macros (such as STR) which call underrunProtect, and this can
1936 // push _nIns to a new page, thus making any PC value impractical to
1938 NanoAssert(rr != PC);
1939 NanoAssert(rt != PC);
1941 // The offset is positive, so use ADD (and variants).
1942 if (isU12(offset)) {
1943 STR(rt, rr, offset);
1948 STR(rt, IP, offset & 0xfff);
1949 asm_add_imm(IP, rr, offset & ~0xfff);
1951 int32_t adj = offset & ~0xfff;
1952 asm_sub_imm(rr, rr, adj);
1953 STR(rt, rr, offset-adj);
1954 asm_add_imm(rr, rr, adj);
1957 // The offset is negative, so use SUB (and variants).
1958 if (isU12(-offset)) {
1959 STR(rt, rr, offset);
1964 STR(rt, IP, -((-offset) & 0xfff));
1965 asm_sub_imm(IP, rr, (-offset) & ~0xfff);
1967 int32_t adj = ((-offset) & ~0xfff);
1968 asm_add_imm(rr, rr, adj);
1969 STR(rt, rr, offset+adj);
1970 asm_sub_imm(rr, rr, adj);
1977 // Emit the code required to load an immediate value (imm) into general-purpose
1978 // register d. Optimal (MOV-based) mechanisms are used if the immediate can be
1979 // encoded using ARM's operand 2 encoding. Otherwise, a slot is used on the
1980 // literal pool and LDR is used to load the value.
1982 // chk can be explicitly set to false in order to disable underrunProtect calls
1983 // from this function; this allows the caller to perform the check manually.
1984 // This function guarantees not to use more than LD32_size bytes of space.
1986 Assembler::asm_ld_imm(Register d, int32_t imm, bool chk /* = true */)
1990 NanoAssert(IsGpReg(d));
1992 // Attempt to encode the immediate using the second operand of MOV or MVN.
1993 // This is the simplest solution and generates the shortest and fastest
1994 // code, but can only encode a limited set of values.
1996 if (encOp2Imm(imm, &op2imm)) {
1997 // Use MOV to encode the literal.
1998 MOVis(d, op2imm, 0);
2002 if (encOp2Imm(~imm, &op2imm)) {
2003 // Use MVN to encode the inverted literal.
2004 MVNis(d, op2imm, 0);
2008 // Try to use simple MOV, MVN or MOV(W|T) instructions to load the
2009 // immediate. If this isn't possible, load it from memory.
2010 // - We cannot use MOV(W|T) on cores older than the introduction of
2011 // Thumb-2 or if the target register is the PC.
2013 // (Note that we use Thumb-2 if arm_arch is ARMv7 or later; the only earlier
2014 // ARM core that provided Thumb-2 is ARMv6T2/ARM1156, which is a real-time
2015 // core that nanojit is unlikely to ever target.)
2016 if (ARM_ARCH_AT_LEAST(7) && (d != PC)) {
2017 // ARMv6T2 and above have MOVW and MOVT.
2018 uint32_t high_h = (uint32_t)imm >> 16;
2019 uint32_t low_h = imm & 0xffff;
2022 // Load the high half-word (if necessary).
2023 MOVTi_chk(d, high_h, chk);
2025 // Load the low half-word. This also zeroes the high half-word, and
2026 // thus must execute _before_ MOVT, and is necessary even if low_h is 0
2027 // because MOVT will not change the existing low half-word.
2028 MOVWi_chk(d, low_h, chk);
2033 // We couldn't encode the literal in the instruction stream, so load it
2036 // Because the literal pool is on the same page as the generated code, it
2037 // will almost always be within the ±4096 range of a LDR. However, this may
2038 // not be the case if _nSlot is at the start of the page and _nIns is at
2039 // the end because the PC is 8 bytes ahead of _nIns. This is unlikely to
2040 // happen, but if it does occur we can simply waste a word or two of
2043 // We must do the underrunProtect before PC_OFFSET_FROM as underrunProtect
2044 // can move the PC if there isn't enough space on the current page!
2046 underrunProtect(LD32_size);
2049 int offset = PC_OFFSET_FROM(_nSlot, _nIns-1);
2050 // If the offset is out of range, waste literal space until it is in range.
2051 while (offset <= -4096) {
2053 offset += sizeof(_nSlot);
2055 NanoAssert((isU12(-offset) || isU12(offset)) && (offset <= -8));
2057 // Write the literal.
2059 asm_output("## imm= 0x%x", imm);
2061 // Load the literal.
2062 LDR_nochk(d,PC,offset);
2063 NanoAssert(uintptr_t(_nIns) + 8 + offset == uintptr_t(_nSlot-1));
2064 NanoAssert(*((int32_t*)_nSlot-1) == imm);
2067 // Branch to target address _t with condition _c, doing underrun
2068 // checks (_chk == 1) or skipping them (_chk == 0).
2070 // Set the target address (_t) to 0 if the target is not yet known and the
2071 // branch will be patched up later.
2073 // If the jump is to a known address (with _t != 0) and it fits in a relative
2074 // jump (±32MB), emit that.
2075 // If the jump is unconditional, emit the dest address inline in
2076 // the instruction stream and load it into pc.
2077 // If the jump has a condition, but noone's mucked with _nIns and our _nSlot
2078 // pointer is valid, stick the constant in the slot and emit a conditional
2080 // Otherwise, emit the conditional load into pc from a nearby constant,
2081 // and emit a jump to jump over it it in case the condition fails.
2083 // NB: B_nochk depends on this not calling samepage() when _c == AL
2085 Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
2087 int32_t offs = PC_OFFSET_FROM(_t,_nIns-1);
2088 //nj_dprintf("B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1);
2090 // optimistically check if this will fit in 24 bits
2091 if (_chk && isS24(offs>>2) && (_t != 0)) {
2093 // recalculate the offset, because underrunProtect may have
2094 // moved _nIns to a new page
2095 offs = PC_OFFSET_FROM(_t,_nIns-1);
2098 // Emit one of the following patterns:
2100 // --- Short branch. This can never be emitted if the branch target is not
2104 // --- Long unconditional branch.
2108 // --- Long conditional branch. Note that conditional branches will never
2109 // be patched, so the nPatchBranch function doesn't need to know where
2110 // the literal pool is located.
2112 // ; #lit is in the literal pool at _nSlot
2114 // --- Long conditional branch (if the slot isn't on the same page as the instruction).
2116 // B skip ; Jump over the literal data.
2120 if (isS24(offs>>2) && (_t != 0)) {
2121 // The underrunProtect for this was done above (if required by _chk).
2122 *(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) );
2123 asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
2124 } else if (_c == AL) {
2125 if(_chk) underrunProtect(8);
2126 *(--_nIns) = (NIns)(_t);
2127 *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 );
2128 asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
2129 } else if (PC_OFFSET_FROM(_nSlot, _nIns-1) > -0x1000) {
2130 if(_chk) underrunProtect(8);
2131 *(_nSlot++) = (NIns)(_t);
2132 offs = PC_OFFSET_FROM(_nSlot-1,_nIns-1);
2133 NanoAssert(offs < 0);
2134 *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFF) );
2135 asm_output("ldr%s %s, [%s, #-%d]", condNames[_c], gpn(PC), gpn(PC), -offs);
2136 NanoAssert(uintptr_t(_nIns)+8+offs == uintptr_t(_nSlot-1));
2138 if(_chk) underrunProtect(12);
2139 // Emit a pointer to the target as a literal in the instruction stream.
2140 *(--_nIns) = (NIns)(_t);
2141 // Emit a branch to skip over the literal. The PC value is 8 bytes
2142 // ahead of the executing instruction, so to branch two instructions
2143 // forward this must branch 8-8=0 bytes.
2144 *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | 0x0 );
2145 // Emit the conditional branch.
2146 *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 );
2147 asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
2156 Assembler::asm_i2d(LIns* ins)
2158 Register dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
2159 Register rt = findRegFor(ins->oprnd1(), GpRegs);
2165 freeResourcesOf(ins);
2169 Assembler::asm_ui2d(LIns* ins)
2171 Register dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
2172 Register rt = findRegFor(ins->oprnd1(), GpRegs);
2178 freeResourcesOf(ins);
2181 void Assembler::asm_d2i(LIns* ins)
2184 if (ins->isInReg()) {
2185 Register rt = ins->getReg();
2188 // There's no active result register, so store the result directly into
2189 // memory to avoid the FP->GP transfer cost on Cortex-A8.
2190 int32_t d = arDisp(ins);
2191 // VFP can only do stores with a range of ±1020, so we might need to do
2192 // some arithmetic to extend its range.
2193 if (isU8(d/4) || isU8(-d/4)) {
2196 FSTS(S0, IP, d%1024);
2197 asm_add_imm(IP, FP, d-(d%1024));
2201 Register dm = findRegFor(ins->oprnd1(), FpRegs & ~rmask(D0));
2205 freeResourcesOf(ins);
2209 Assembler::asm_fneg(LIns* ins)
2211 LIns* lhs = ins->oprnd1();
2213 Register dd = prepareResultReg(ins, FpRegs);
2214 // If the argument doesn't have a register assigned, re-use dd.
2215 Register dm = lhs->isInReg() ? lhs->getReg() : dd;
2219 freeResourcesOf(ins);
2221 NanoAssert(!lhs->isInReg());
2222 findSpecificRegForUnallocated(lhs, dd);
2227 Assembler::asm_fop(LIns* ins)
2229 LIns* lhs = ins->oprnd1();
2230 LIns* rhs = ins->oprnd2();
2232 Register dd = prepareResultReg(ins, FpRegs);
2233 // Try to re-use the result register for one of the arguments.
2234 Register dn = lhs->isInReg() ? lhs->getReg() : dd;
2235 Register dm = rhs->isInReg() ? rhs->getReg() : dd;
2236 if ((dn == dm) && (lhs != rhs)) {
2237 // We can't re-use the result register for both arguments, so force one
2238 // into its own register.
2239 dm = findRegFor(rhs, FpRegs & ~rmask(dd));
2240 NanoAssert(rhs->isInReg());
2243 // TODO: Special cases for simple constants.
2245 switch(ins->opcode()) {
2246 case LIR_addd: FADDD(dd,dn,dm); break;
2247 case LIR_subd: FSUBD(dd,dn,dm); break;
2248 case LIR_muld: FMULD(dd,dn,dm); break;
2249 case LIR_divd: FDIVD(dd,dn,dm); break;
2250 default: NanoAssert(0); break;
2253 freeResourcesOf(ins);
2255 // If we re-used the result register, mark it as active.
2257 NanoAssert(!lhs->isInReg());
2258 findSpecificRegForUnallocated(lhs, dd);
2259 } else if (dm == dd) {
2260 NanoAssert(!rhs->isInReg());
2261 findSpecificRegForUnallocated(rhs, dd);
2263 NanoAssert(lhs->isInReg());
2264 NanoAssert(rhs->isInReg());
2269 Assembler::asm_cmpd(LIns* ins)
2271 LIns* lhs = ins->oprnd1();
2272 LIns* rhs = ins->oprnd2();
2273 LOpcode op = ins->opcode();
2275 NanoAssert(ARM_VFP);
2276 NanoAssert(isCmpDOpcode(op));
2277 NanoAssert(lhs->isD() && rhs->isD());
2280 findRegFor2(FpRegs, lhs, ra, FpRegs, rhs, rb);
2282 int e_bit = (op != LIR_eqd);
2284 // Do the comparison and get results loaded in ARM status register.
2285 // TODO: For asm_condd, we should put the results directly into an ARM
2286 // machine register, then use bit operations to get the result.
2288 FCMPD(ra, rb, e_bit);
2291 /* Call this with targ set to 0 if the target is not yet known and the branch
2292 * will be patched up later.
2295 Assembler::asm_branch(bool branchOnFalse, LIns* cond, NIns* targ)
2297 LOpcode condop = cond->opcode();
2298 NanoAssert(cond->isCmp());
2299 NanoAssert(ARM_VFP || !isCmpDOpcode(condop));
2301 // The old "never" condition code has special meaning on newer ARM cores,
2302 // so use "always" as a sensible default code.
2303 ConditionCode cc = AL;
2305 // Detect whether or not this is a floating-point comparison.
2308 // Select the appropriate ARM condition code to match the LIR instruction.
2311 // Floating-point conditions. Note that the VFP LT/LE conditions
2312 // require use of the unsigned condition codes, even though
2313 // float-point comparisons are always signed.
2314 case LIR_eqd: cc = EQ; fp_cond = true; break;
2315 case LIR_ltd: cc = LO; fp_cond = true; break;
2316 case LIR_led: cc = LS; fp_cond = true; break;
2317 case LIR_ged: cc = GE; fp_cond = true; break;
2318 case LIR_gtd: cc = GT; fp_cond = true; break;
2320 // Standard signed and unsigned integer comparisons.
2321 case LIR_eqi: cc = EQ; fp_cond = false; break;
2322 case LIR_lti: cc = LT; fp_cond = false; break;
2323 case LIR_lei: cc = LE; fp_cond = false; break;
2324 case LIR_gti: cc = GT; fp_cond = false; break;
2325 case LIR_gei: cc = GE; fp_cond = false; break;
2326 case LIR_ltui: cc = LO; fp_cond = false; break;
2327 case LIR_leui: cc = LS; fp_cond = false; break;
2328 case LIR_gtui: cc = HI; fp_cond = false; break;
2329 case LIR_geui: cc = HS; fp_cond = false; break;
2331 // Default case for invalid or unexpected LIR instructions.
2332 default: cc = AL; fp_cond = false; break;
2335 // Invert the condition if required.
2337 cc = OppositeCond(cc);
2339 // Ensure that we got a sensible condition code.
2340 NanoAssert((cc != AL) && (cc != NV));
2342 // Ensure that we don't hit floating-point LIR codes if VFP is disabled.
2343 NanoAssert(ARM_VFP || !fp_cond);
2345 // Emit a suitable branch instruction.
2348 // Store the address of the branch instruction so that we can return it.
2349 // asm_[f]cmp will move _nIns so we must do this now.
2357 NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
2359 // Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
2360 // to detect overflow on multiply. Thus, if we have a LIR_mulxovi, we must
2361 // be conditional on !Z, not V.
2362 ConditionCode cc = ( (op == LIR_mulxovi) || (op == LIR_muljovi) ? NE : VS );
2364 // Emit a suitable branch instruction.
2370 Assembler::asm_cmp(LIns *cond)
2372 LIns* lhs = cond->oprnd1();
2373 LIns* rhs = cond->oprnd2();
2375 // Forward floating-point comparisons directly to asm_cmpd to simplify
2376 // logic in other methods which need to issue an implicit comparison, but
2377 // don't care about the details of comparison itself.
2379 NanoAssert(rhs->isD());
2384 NanoAssert(lhs->isI() && rhs->isI());
2386 // ready to issue the compare
2387 if (rhs->isImmI()) {
2388 int c = rhs->immI();
2389 Register r = findRegFor(lhs, GpRegs);
2393 findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
2399 Assembler::asm_cmpi(Register r, int32_t imm)
2403 ALUi(AL, cmn, 1, 0, r, -imm);
2405 underrunProtect(4 + LD32_size);
2407 asm_ld_imm(IP, imm);
2411 ALUi(AL, cmp, 1, 0, r, imm);
2413 underrunProtect(4 + LD32_size);
2415 asm_ld_imm(IP, imm);
2421 Assembler::asm_condd(LIns* ins)
2423 Register rd = prepareResultReg(ins, GpRegs);
2425 // TODO: Modify cmpd to allow the FP flags to move directly to an ARM
2426 // machine register, then use simple bit operations here rather than
2427 // conditional moves.
2429 switch (ins->opcode()) {
2430 case LIR_eqd: SETEQ(rd); break;
2431 case LIR_ltd: SETLO(rd); break; // Note: VFP LT/LE operations require
2432 case LIR_led: SETLS(rd); break; // unsigned LO/LS condition codes!
2433 case LIR_ged: SETGE(rd); break;
2434 case LIR_gtd: SETGT(rd); break;
2435 default: NanoAssert(0); break;
2438 freeResourcesOf(ins);
2444 Assembler::asm_cond(LIns* ins)
2446 Register rd = prepareResultReg(ins, GpRegs);
2447 LOpcode op = ins->opcode();
2451 case LIR_eqi: SETEQ(rd); break;
2452 case LIR_lti: SETLT(rd); break;
2453 case LIR_lei: SETLE(rd); break;
2454 case LIR_gti: SETGT(rd); break;
2455 case LIR_gei: SETGE(rd); break;
2456 case LIR_ltui: SETLO(rd); break;
2457 case LIR_leui: SETLS(rd); break;
2458 case LIR_gtui: SETHI(rd); break;
2459 case LIR_geui: SETHS(rd); break;
2460 default: NanoAssert(0); break;
2463 freeResourcesOf(ins);
2469 Assembler::asm_arith(LIns* ins)
2471 LOpcode op = ins->opcode();
2472 LIns* lhs = ins->oprnd1();
2473 LIns* rhs = ins->oprnd2();
2475 // We always need the result register and the first operand register, so
2476 // find them up-front. (If the second operand is constant it is encoded
2478 Register rd = prepareResultReg(ins, GpRegs);
2480 // Try to re-use the result register for operand 1.
2481 Register rn = lhs->isInReg() ? lhs->getReg() : rd;
2483 // If the rhs is constant, we can use the instruction-specific code to
2484 // determine if the value can be encoded in an ARM instruction. If the
2485 // value cannot be encoded, it will be loaded into a register.
2487 // Note that the MUL instruction can never take an immediate argument so
2488 // even if the argument is constant, we must allocate a register for it.
2489 if (rhs->isImmI() && (op != LIR_muli) && (op != LIR_mulxovi) && (op != LIR_muljovi))
2491 int32_t immI = rhs->immI();
2495 case LIR_addi: asm_add_imm(rd, rn, immI); break;
2497 case LIR_addxovi: asm_add_imm(rd, rn, immI, 1); break;
2498 case LIR_subi: asm_sub_imm(rd, rn, immI); break;
2500 case LIR_subxovi: asm_sub_imm(rd, rn, immI, 1); break;
2501 case LIR_andi: asm_and_imm(rd, rn, immI); break;
2502 case LIR_ori: asm_orr_imm(rd, rn, immI); break;
2503 case LIR_xori: asm_eor_imm(rd, rn, immI); break;
2504 case LIR_lshi: LSLi(rd, rn, immI); break;
2505 case LIR_rshi: ASRi(rd, rn, immI); break;
2506 case LIR_rshui: LSRi(rd, rn, immI); break;
2509 NanoAssertMsg(0, "Unsupported");
2513 freeResourcesOf(ins);
2515 // Mark the re-used register as active.
2516 NanoAssert(!lhs->isInReg());
2517 findSpecificRegForUnallocated(lhs, rd);
2522 // The rhs is either already in a register or cannot be encoded as an
2523 // Operand 2 constant for this operation.
2525 Register rm = rhs->isInReg() ? rhs->getReg() : rd;
2526 if ((rm == rn) && (lhs != rhs)) {
2527 // We can't re-use the result register for both arguments, so force one
2528 // into its own register. We favour re-use for operand 2 (rm) here as
2529 // it is more likely to take a fast path for LIR_mul on ARMv5.
2530 rn = findRegFor(lhs, GpRegs & ~rmask(rd));
2531 NanoAssert(lhs->isInReg());
2536 case LIR_addi: ADDs(rd, rn, rm, 0); break;
2538 case LIR_addxovi: ADDs(rd, rn, rm, 1); break;
2539 case LIR_subi: SUBs(rd, rn, rm, 0); break;
2541 case LIR_subxovi: SUBs(rd, rn, rm, 1); break;
2542 case LIR_andi: ANDs(rd, rn, rm, 0); break;
2543 case LIR_ori: ORRs(rd, rn, rm, 0); break;
2544 case LIR_xori: EORs(rd, rn, rm, 0); break;
2547 if (!ARM_ARCH_AT_LEAST(6) && (rd == rn)) {
2548 // ARMv4 and ARMv5 cannot handle a MUL where rd == rn, so
2549 // explicitly assign a new register to rn.
2550 NanoAssert(!lhs->isInReg());
2551 rn = findRegFor(lhs, GpRegs & ~rmask(rd) & ~rmask(rm));
2560 if (!ARM_ARCH_AT_LEAST(6) && (rd == rn)) {
2561 // ARMv5 (and earlier) cannot handle a MUL where rd == rn, so
2562 // if that is the case, explicitly assign a new register to rn.
2563 NanoAssert(!lhs->isInReg());
2564 rn = findRegFor(lhs, GpRegs & ~rmask(rd) & ~rmask(rm));
2569 // ARM cannot automatically detect overflow from a MUL operation,
2570 // so we have to perform some other arithmetic:
2571 // SMULL rr, ip, ra, rb
2572 // CMP ip, rr, ASR #31
2573 // An explanation can be found in bug 521161. This sets Z if we did
2574 // _not_ overflow, and clears it if we did.
2575 ALUr_shi(AL, cmp, 1, SBZ, IP, rd, ASR_imm, 31);
2576 SMULL(rd, IP, rn, rm);
2579 // The shift operations need a mask to match the JavaScript
2580 // specification because the ARM architecture allows a greater shift
2581 // range than JavaScript.
2595 NanoAssertMsg(0, "Unsupported");
2599 freeResourcesOf(ins);
2600 // If we re-used the result register, mark it as active.
2602 NanoAssert(!lhs->isInReg());
2603 findSpecificRegForUnallocated(lhs, rd);
2604 } else if (rm == rd) {
2605 NanoAssert(!rhs->isInReg());
2606 findSpecificRegForUnallocated(rhs, rd);
2608 NanoAssert(lhs->isInReg());
2609 NanoAssert(rhs->isInReg());
2614 Assembler::asm_neg_not(LIns* ins)
2616 LIns* lhs = ins->oprnd1();
2617 Register rr = prepareResultReg(ins, GpRegs);
2619 // If 'lhs' isn't in a register, we can give it the result register.
2620 Register ra = lhs->isInReg() ? lhs->getReg() : rr;
2622 if (ins->isop(LIR_noti)) {
2625 NanoAssert(ins->isop(LIR_negi));
2629 freeResourcesOf(ins);
2630 if (!lhs->isInReg()) {
2631 NanoAssert(ra == rr);
2632 // Update the register state to indicate that we've claimed ra for lhs.
2633 findSpecificRegForUnallocated(lhs, ra);
2638 Assembler::asm_load32(LIns* ins)
2640 LOpcode op = ins->opcode();
2641 LIns* base = ins->oprnd1();
2642 int d = ins->disp();
2644 Register rt = prepareResultReg(ins, GpRegs);
2645 // Try to re-use the result register for the base pointer.
2646 Register rn = base->isInReg() ? base->getReg() : rt;
2648 // TODO: The x86 back-end has a special case where the base address is
2649 // given by LIR_addp. The same technique may be useful here to take
2650 // advantage of ARM's register+register addressing mode.
2654 if (isU12(-d) || isU12(d)) {
2657 LDRB(rt, IP, d%4096);
2658 asm_add_imm(IP, rn, d-(d%4096));
2662 // Some ARM machines require 2-byte alignment here.
2663 // Similar to the lduc2ui case, but the max offset is smaller.
2664 if (isU8(-d) || isU8(d)) {
2667 LDRH(rt, IP, d%256);
2668 asm_add_imm(IP, rn, d-(d%256));
2672 // Some ARM machines require 4-byte alignment here.
2673 if (isU12(-d) || isU12(d)) {
2676 LDR(rt, IP, d%4096);
2677 asm_add_imm(IP, rn, d-(d%4096));
2681 // Like LIR_lduc2ui, but sign-extend.
2682 // Some ARM machines require 2-byte alignment here.
2683 if (isU8(-d) || isU8(d)) {
2686 LDRSB(rn, IP, d%256);
2687 asm_add_imm(IP, rn, d-(d%256));
2691 // Like LIR_ldus2ui, but sign-extend.
2692 if (isU8(-d) || isU8(d)) {
2695 LDRSH(rt, IP, d%256);
2696 asm_add_imm(IP, rn, d-(d%256));
2700 NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
2704 freeResourcesOf(ins);
2707 NanoAssert(!base->isInReg());
2708 findSpecificRegForUnallocated(base, rn);
2713 Assembler::asm_cmov(LIns* ins)
2715 LIns* condval = ins->oprnd1();
2716 LIns* iftrue = ins->oprnd2();
2717 LIns* iffalse = ins->oprnd3();
2718 RegisterMask allow = ins->isD() ? FpRegs : GpRegs;
2721 NanoAssert(condval->isCmp());
2722 NanoAssert((ins->isop(LIR_cmovi) && iftrue->isI() && iffalse->isI()) ||
2723 (ins->isop(LIR_cmovd) && iftrue->isD() && iffalse->isD()));
2725 Register rd = prepareResultReg(ins, allow);
2727 // Try to re-use the result register for one of the arguments.
2728 Register rt = iftrue->isInReg() ? iftrue->getReg() : rd;
2729 Register rf = iffalse->isInReg() ? iffalse->getReg() : rd;
2730 // Note that iftrue and iffalse may actually be the same, though it
2731 // shouldn't happen with the LIR optimizers turned on.
2732 if ((rt == rf) && (iftrue != iffalse)) {
2733 // We can't re-use the result register for both arguments, so force one
2734 // into its own register.
2735 rf = findRegFor(iffalse, allow & ~rmask(rd));
2736 NanoAssert(iffalse->isInReg());
2739 switch(condval->opcode()) {
2740 default: NanoAssert(0);
2741 // Integer comparisons.
2742 case LIR_eqi: cc = EQ; break;
2743 case LIR_lti: cc = LT; break;
2744 case LIR_lei: cc = LE; break;
2745 case LIR_gti: cc = GT; break;
2746 case LIR_gei: cc = GE; break;
2747 case LIR_ltui: cc = LO; break;
2748 case LIR_leui: cc = LS; break;
2749 case LIR_gtui: cc = HI; break;
2750 case LIR_geui: cc = HS; break;
2752 case LIR_eqd: cc = EQ; break;
2753 case LIR_ltd: cc = LO; break;
2754 case LIR_led: cc = LS; break;
2755 case LIR_ged: cc = GE; break;
2756 case LIR_gtd: cc = GT; break;
2759 // Emit something like this:
2763 // If the destination was re-used for an input, the corresponding MOV will
2764 // be omitted as it will be redundant.
2767 MOV_cond(OppositeCond(cc), rd, rf);
2770 MOV_cond(cc, rd, rt);
2772 } else if (ins->isD()) {
2773 // The VFP sequence is similar to the integer sequence, but uses a
2774 // VFP instruction in place of MOV.
2775 NanoAssert(ARM_VFP);
2777 FCPYD_cond(OppositeCond(cc), rd, rf);
2780 FCPYD_cond(cc, rd, rt);
2786 freeResourcesOf(ins);
2788 // If we re-used the result register, mark it as active for either iftrue
2789 // or iffalse (or both in the corner-case where they're the same).
2791 NanoAssert(!iftrue->isInReg());
2792 findSpecificRegForUnallocated(iftrue, rd);
2793 } else if (rf == rd) {
2794 NanoAssert(!iffalse->isInReg());
2795 findSpecificRegForUnallocated(iffalse, rd);
2797 NanoAssert(iffalse->isInReg());
2798 NanoAssert(iftrue->isInReg());
2805 Assembler::asm_qhi(LIns* ins)
2807 Register rd = prepareResultReg(ins, GpRegs);
2808 LIns *lhs = ins->oprnd1();
2809 int d = findMemFor(lhs);
2813 freeResourcesOf(ins);
2817 Assembler::asm_qlo(LIns* ins)
2819 Register rd = prepareResultReg(ins, GpRegs);
2820 LIns *lhs = ins->oprnd1();
2821 int d = findMemFor(lhs);
2825 freeResourcesOf(ins);
2829 Assembler::asm_param(LIns* ins)
2831 uint32_t a = ins->paramArg();
2832 uint32_t kind = ins->paramKind();
2834 // Ordinary parameter. These are always (32-bit-)word-sized, and will
2835 // be in the first four registers (argRegs) and then on the stack.
2837 // Register argument.
2838 prepareResultReg(ins, rmask(argRegs[a]));
2841 Register r = prepareResultReg(ins, GpRegs);
2842 int d = (a - 4) * sizeof(intptr_t) + 8;
2847 NanoAssert(a < (sizeof(savedRegs)/sizeof(savedRegs[0])));
2848 prepareResultReg(ins, rmask(savedRegs[a]));
2850 freeResourcesOf(ins);
2854 Assembler::asm_immi(LIns* ins)
2856 Register rd = prepareResultReg(ins, GpRegs);
2857 asm_ld_imm(rd, ins->immI());
2858 freeResourcesOf(ins);
2862 Assembler::asm_ret(LIns *ins)
2866 // NB: our contract with genEpilogue is actually that the return value
2867 // we are intending for R0 is currently IP, not R0. This has to do with
2868 // the strange dual-nature of the patchable jump in a side-exit. See
2871 // With hardware floating point ABI we can skip this for retd.
2872 if (!(ARM_EABI_HARD && ins->isop(LIR_retd))) {
2876 // Pop the stack frame.
2881 LIns *value = ins->oprnd1();
2882 if (ins->isop(LIR_reti)) {
2883 findSpecificRegFor(value, R0);
2886 NanoAssert(ins->isop(LIR_retd));
2888 #ifdef NJ_ARM_EABI_HARD_FLOAT
2889 findSpecificRegFor(value, D0);
2891 Register reg = findRegFor(value, FpRegs);
2895 NanoAssert(value->isop(LIR_ii2d));
2896 findSpecificRegFor(value->oprnd1(), R0); // lo
2897 findSpecificRegFor(value->oprnd2(), R1); // hi
2903 Assembler::asm_jtbl(LIns* ins, NIns** table)
2905 Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
2906 Register tmp = registerAllocTmp(GpRegs & ~rmask(indexreg));
2907 LDR_scaled(PC, tmp, indexreg, 2); // LDR PC, [tmp + index*4]
2908 asm_ld_imm(tmp, (int32_t)table); // tmp = #table
2911 void Assembler::swapCodeChunks() {
2913 codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes), NJ_MAX_CPOOL_OFFSET);
2915 _nExitSlot = exitStart;
2916 SWAP(NIns*, _nIns, _nExitIns);
2917 SWAP(NIns*, _nSlot, _nExitSlot); // this one is ARM-specific
2918 SWAP(NIns*, codeStart, exitStart);
2919 SWAP(NIns*, codeEnd, exitEnd);
2920 verbose_only( SWAP(size_t, codeBytes, exitBytes); )
2923 void Assembler::asm_insert_random_nop() {
2924 NanoAssert(0); // not supported
2928 #endif /* FEATURE_NANOJIT */