1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
42 #if defined FEATURE_NANOJIT && defined NANOJIT_PPC
46 const Register Assembler::retRegs[] = { R3, R4 }; // high=R3, low=R4
47 const Register Assembler::argRegs[] = { R3, R4, R5, R6, R7, R8, R9, R10 };
49 const Register Assembler::savedRegs[] = {
50 #if !defined NANOJIT_64BIT
53 R14, R15, R16, R17, R18, R19, R20, R21, R22,
54 R23, R24, R25, R26, R27, R28, R29, R30
57 const char *regNames[] = {
58 "r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7",
59 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
60 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
61 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
62 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
63 "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
64 "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
65 "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
68 const char *bitNames[] = { "lt", "gt", "eq", "so" };
70 #define TODO(x) do{ avmplus::AvmLog(#x); NanoAssertMsgf(false, "%s", #x); } while(0)
73 * see http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/index.html
74 * stack layout (higher address going down)
75 * sp -> out linkage area
79 * sp' -> in linkage area
82 * linkage area layout:
87 * sp+12 sp+24 reserved
90 const int min_param_area_size = 8*sizeof(void*); // r3-r10
91 const int linkage_size = 6*sizeof(void*);
92 const int lr_offset = 2*sizeof(void*); // linkage.lr
93 const int cr_offset = 1*sizeof(void*); // linkage.cr
95 NIns* Assembler::genPrologue() {
97 // stw r0, lr_offset(sp)
98 // stwu sp, -framesize(sp)
100 // param_area must be at least large enough for r3-r10 to be saved,
101 // regardless of whether we think the callee needs less: e.g., the callee
102 // might tail-call to a function that uses varargs, which could flush
103 // r3-r10 to the parameter area.
104 uint32_t param_area = (max_param_size > min_param_area_size) ? max_param_size : min_param_area_size;
105 // activation frame is 4 bytes per entry even on 64bit machines
106 uint32_t stackNeeded = param_area + linkage_size + _activation.stackSlotsNeeded() * 4;
107 uint32_t aligned = alignUp(stackNeeded, NJ_ALIGN_STACK);
109 UNLESS_PEDANTIC( if (isS16(aligned)) {
110 STPU(SP, -aligned, SP); // *(sp-aligned) = sp; sp -= aligned
113 asm_li(R0, -aligned);
116 NIns *patchEntry = _nIns;
117 MR(FP,SP); // save SP to use as a FP
118 STP(FP, cr_offset, SP); // cheat and save our FP in linkage.cr
119 STP(R0, lr_offset, SP); // save LR in linkage.lr
125 NIns* Assembler::genEpilogue() {
128 LP(R0, lr_offset, SP);
129 LP(FP, cr_offset, SP); // restore FP from linkage.cr
134 void Assembler::asm_load32(LIns *ins) {
135 LIns* base = ins->oprnd1();
137 Register rr = deprecated_prepResultReg(ins, GpRegs);
138 Register ra = getBaseReg(base, d, GpRegs);
140 switch(ins->opcode()) {
145 LBZX(rr, ra, R0); // rr = [ra+R0]
150 // these are expected to be 2 or 4-byte aligned
154 LHZX(rr, ra, R0); // rr = [ra+R0]
159 // these are expected to be 4-byte aligned
163 LWZX(rr, ra, R0); // rr = [ra+R0]
169 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
172 NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
177 void Assembler::asm_store32(LOpcode op, LIns *value, int32_t dr, LIns *base) {
182 // handled by mainline code below for now
185 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
188 NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
192 Register rs = findRegFor(value, GpRegs);
193 Register ra = value == base ? rs : getBaseReg(base, dr, GpRegs & ~rmask(rs));
209 // general case store, any offset size
221 void Assembler::asm_load64(LIns *ins) {
223 switch (ins->opcode()) {
226 // handled by mainline code below for now
229 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
232 NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
236 LIns* base = ins->oprnd1();
238 Register rr = ins->deprecated_getReg();
239 if (deprecated_isKnownReg(rr) && (rmask(rr) & FpRegs)) {
240 // FPR already assigned, fine, use it
241 deprecated_freeRsrcOf(ins);
243 // use a GPR register; its okay to copy doubles with GPR's
244 // but *not* okay to copy non-doubles with FPR's
245 rr = deprecated_prepResultReg(ins, GpRegs);
248 Register rr = deprecated_prepResultReg(ins, FpRegs);
251 int dr = ins->disp();
252 Register ra = getBaseReg(base, dr, GpRegs);
255 if (rmask(rr) & GpRegs) {
262 // general case 64bit GPR load
277 // general case FPR load
282 void Assembler::asm_li(Register r, int32_t imm) {
288 if ((imm & 0xffff) == 0) {
289 imm = uint32_t(imm) >> 16;
297 void Assembler::asm_li32(Register r, int32_t imm) {
299 // TODO use ADDI instead of ORI if r != r0, impl might have 3way adder
301 LIS(r, imm>>16); // on ppc64, this sign extends
304 void Assembler::asm_li64(Register r, uint64_t imm) {
305 underrunProtect(5*sizeof(NIns)); // must be contiguous to be patchable
306 ORI(r,r,uint16_t(imm)); // r[0:15] = imm[0:15]
307 ORIS(r,r,uint16_t(imm>>16)); // r[16:31] = imm[16:31]
308 SLDI(r,r,32); // r[32:63] = r[0:31], r[0:31] = 0
309 asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63]
312 void Assembler::asm_store64(LOpcode op, LIns *value, int32_t dr, LIns *base) {
313 NanoAssert(value->isQorD());
318 // handled by mainline code below for now
321 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
324 NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
328 Register ra = getBaseReg(base, dr, GpRegs);
330 // general case for any value
331 #if !defined NANOJIT_64BIT
332 // on 32bit cpu's, we only use store64 for doubles
333 Register rs = findRegFor(value, FpRegs);
335 // if we have to choose a register, use a GPR
336 Register rs = ( !value->isInReg()
337 ? findRegFor(value, GpRegs & ~rmask(ra))
338 : value->deprecated_getReg() );
340 if (rmask(rs) & GpRegs) {
348 // general case store 64bit GPR
353 #endif // NANOJIT_64BIT
363 // general case for any offset
368 void Assembler::asm_cond(LIns *ins) {
369 LOpcode op = ins->opcode();
370 LIns *a = ins->oprnd1();
371 LIns *b = ins->oprnd2();
372 ConditionRegister cr = CR7;
373 Register r = deprecated_prepResultReg(ins, GpRegs);
375 case LIR_eqi: case LIR_eqd:
377 EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
380 case LIR_lti: case LIR_ltui:
381 case LIR_ltd: case LIR_led:
382 CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
383 EXTRWI(r, r, 1, 4*cr+COND_lt); // extract CR7.lt
386 case LIR_gti: case LIR_gtui:
387 case LIR_gtd: case LIR_ged:
388 CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
389 EXTRWI(r, r, 1, 4*cr+COND_gt); // extract CR7.gt
392 case LIR_lei: case LIR_leui:
393 CASE64(LIR_leq:) CASE64(LIR_leuq:)
394 EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
396 CROR(CR7, eq, lt, eq);
398 case LIR_gei: case LIR_geui:
399 CASE64(LIR_geq:) CASE64(LIR_geuq:)
400 EXTRWI(r, r, 1, 4*cr+COND_eq); // select CR7.eq
402 CROR(CR7, eq, gt, eq);
405 debug_only(outputf("%s",lirNames[ins->opcode()]);)
409 asm_cmp(op, a, b, cr);
412 void Assembler::asm_condd(LIns *ins) {
416 // cause sign extension to test bits. ptrdiff_t is a signed,
418 static inline bool isS14(ptrdiff_t d) {
419 const int shift = sizeof(ptrdiff_t) * 8 - 14; // 18 or 50
420 return ((d << shift) >> shift) == d;
423 NIns* Assembler::asm_branch(bool onfalse, LIns *cond, NIns * const targ) {
424 LOpcode condop = cond->opcode();
425 NanoAssert(cond->isCmp());
427 // powerpc offsets are based on the address of the branch instruction
430 ptrdiff_t bd = targ - (_nIns-1);
431 if (targ && isS24(bd))
432 patch = asm_branch_near(onfalse, cond, targ);
435 patch = asm_branch_far(onfalse, cond, targ);
436 asm_cmp(condop, cond->oprnd1(), cond->oprnd2(), CR7);
440 NIns* Assembler::asm_branch_near(bool onfalse, LIns *cond, NIns * const targ) {
441 NanoAssert(targ != 0);
443 ptrdiff_t bd = targ - (_nIns-1);
447 bd = targ - (_nIns-1);
449 // can't fit conditional branch offset into 14 bits, but
450 // we can fit in 24, so invert the condition and branch
451 // around an unconditional jump
452 verbose_only(verbose_outputf("%p:", _nIns);)
455 patch = _nIns; // this is the patchable branch to the given target
457 bd = skip - (_nIns-1);
458 NanoAssert(isS14(bd));
459 verbose_only(verbose_outputf("branch24");)
463 return asm_branch_far(onfalse, cond, targ);
466 ConditionRegister cr = CR7;
467 switch (cond->opcode()) {
471 if (onfalse) BNE(cr,bd); else BEQ(cr,bd);
473 case LIR_lti: case LIR_ltui:
474 case LIR_ltd: case LIR_led:
475 CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
476 if (onfalse) BNL(cr,bd); else BLT(cr,bd);
478 case LIR_lei: case LIR_leui:
479 CASE64(LIR_leq:) CASE64(LIR_leuq:)
480 if (onfalse) BGT(cr,bd); else BLE(cr,bd);
482 case LIR_gti: case LIR_gtui:
483 case LIR_gtd: case LIR_ged:
484 CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
485 if (onfalse) BNG(cr,bd); else BGT(cr,bd);
487 case LIR_gei: case LIR_geui:
488 CASE64(LIR_geq:) CASE64(LIR_geuq:)
489 if (onfalse) BLT(cr,bd); else BGE(cr,bd);
492 debug_only(outputf("%s",lirNames[cond->opcode()]);)
500 // general case branch to any address (using CTR)
501 NIns *Assembler::asm_branch_far(bool onfalse, LIns *cond, NIns * const targ) {
502 LOpcode condop = cond->opcode();
503 ConditionRegister cr = CR7;
509 if (onfalse) BNECTR(cr); else BEQCTR(cr);
511 case LIR_lti: case LIR_ltui:
512 CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
513 case LIR_ltd: case LIR_led:
514 if (onfalse) BNLCTR(cr); else BLTCTR(cr);
516 case LIR_lei: case LIR_leui:
517 CASE64(LIR_leq:) CASE64(LIR_leuq:)
518 if (onfalse) BGTCTR(cr); else BLECTR(cr);
520 case LIR_gti: case LIR_gtui:
521 CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
522 case LIR_gtd: case LIR_ged:
523 if (onfalse) BNGCTR(cr); else BGTCTR(cr);
525 case LIR_gei: case LIR_geui:
526 CASE64(LIR_geq:) CASE64(LIR_geuq:)
527 if (onfalse) BLTCTR(cr); else BGECTR(cr);
530 debug_only(outputf("%s",lirNames[condop]);)
534 #if !defined NANOJIT_64BIT
536 asm_li32(R0, (int)targ);
539 if (!targ || !isU32(uintptr_t(targ))) {
540 asm_li64(R0, uint64_t(targ));
542 asm_li32(R0, uint32_t(uintptr_t(targ)));
548 NIns* Assembler::asm_branch_ov(LOpcode, NIns*) {
553 void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
554 RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
555 Register ra = findRegFor(a, allow);
559 int32_t d = b->immI();
561 if (isCmpSIOpcode(condop)) {
565 #if defined NANOJIT_64BIT
566 if (isCmpSQOpcode(condop)) {
574 if (isCmpUIOpcode(condop)) {
578 #if defined NANOJIT_64BIT
579 if (isCmpUQOpcode(condop)) {
590 Register rb = b==a ? ra : findRegFor(b, allow & ~rmask(ra));
591 if (isCmpSIOpcode(condop)) {
594 else if (isCmpUIOpcode(condop)) {
597 #if defined NANOJIT_64BIT
598 else if (isCmpSQOpcode(condop)) {
601 else if (isCmpUQOpcode(condop)) {
605 else if (isCmpDOpcode(condop)) {
606 // set the lt/gt bit for fle/fge. We don't do this for
607 // int/uint because in those cases we can invert the branch condition.
608 // for float, we can't because of unordered comparisons
609 if (condop == LIR_led)
610 CROR(cr, lt, lt, eq); // lt = lt|eq
611 else if (condop == LIR_ged)
612 CROR(cr, gt, gt, eq); // gt = gt|eq
620 void Assembler::asm_ret(LIns *ins) {
624 LIns *value = ins->oprnd1();
625 Register r = ins->isop(LIR_retd) ? F1 : R3;
626 findSpecificRegFor(value, r);
629 void Assembler::asm_nongp_copy(Register r, Register s) {
630 // PPC doesn't support any GPR<->FPR moves
631 NanoAssert((rmask(r) & FpRegs) && (rmask(s) & FpRegs));
635 bool Assembler::canRemat(LIns* ins)
637 return ins->isImmI() || ins->isop(LIR_allocp);
640 void Assembler::asm_restore(LIns *i, Register r) {
642 if (i->isop(LIR_allocp)) {
643 d = deprecated_disp(i);
646 else if (i->isImmI()) {
647 asm_li(r, i->immI());
652 NanoAssert(i->isQorD());
654 } else if (i->isQorD()) {
655 NanoAssert(IsGpReg(r));
658 NanoAssert(i->isI());
659 NanoAssert(IsGpReg(r));
665 void Assembler::asm_immi(LIns *ins) {
666 Register rr = deprecated_prepResultReg(ins, GpRegs);
667 asm_li(rr, ins->immI());
670 void Assembler::asm_fneg(LIns *ins) {
671 Register rr = deprecated_prepResultReg(ins, FpRegs);
672 Register ra = findRegFor(ins->oprnd1(), FpRegs);
676 void Assembler::asm_param(LIns *ins) {
677 uint32_t a = ins->paramArg();
678 uint32_t kind = ins->paramKind();
681 // first eight args always in R3..R10 for PPC
683 // incoming arg in register
684 deprecated_prepResultReg(ins, rmask(argRegs[a]));
686 // todo: support stack based args, arg 0 is at [FP+off] where off
687 // is the # of regs to be pushed in genProlog()
693 deprecated_prepResultReg(ins, rmask(savedRegs[a]));
697 void Assembler::asm_call(LIns *ins) {
698 if (!ins->isop(LIR_callv)) {
699 Register retReg = ( ins->isop(LIR_calld) ? F1 : retRegs[0] );
700 deprecated_prepResultReg(ins, rmask(retReg));
703 // Do this after we've handled the call result, so we don't
704 // force the call result to be spilled unnecessarily.
705 evictScratchRegsExcept(0);
707 const CallInfo* call = ins->callInfo();
708 ArgType argTypes[MAXARGS];
709 uint32_t argc = call->getArgTypes(argTypes);
712 if (!(indirect = call->isIndirect())) {
713 verbose_only(if (_logc->lcbits & LC_Native)
714 outputf(" %p:", _nIns);
716 br((NIns*)call->_address, 1);
718 // Indirect call: we assign the address arg to R11 since it's not
719 // used for regular arguments, and is otherwise scratch since it's
720 // clobberred by the call.
721 underrunProtect(8); // underrunProtect might clobber CTR
724 asm_regarg(ARGTYPE_P, ins->arg(--argc), R11);
731 for(uint32_t i = 0; i < argc; i++) {
732 uint32_t j = argc - i - 1;
733 ArgType ty = argTypes[j];
734 LIns* arg = ins->arg(j);
735 NanoAssert(ty != ARGTYPE_V);
736 if (ty != ARGTYPE_D) {
739 asm_regarg(ty, arg, r);
741 param_size += sizeof(void*);
749 asm_regarg(ty, arg, fr);
754 r = r + 2; // Skip 2 GPRs.
756 param_size += sizeof(double);
763 if (param_size > max_param_size)
764 max_param_size = param_size;
767 void Assembler::asm_regarg(ArgType ty, LIns* p, Register r)
769 NanoAssert(r != deprecated_UnknownReg);
770 NanoAssert(ty != ARGTYPE_V);
774 if (ty == ARGTYPE_I) {
775 // sign extend 32->64
777 } else if (ty == ARGTYPE_UI) {
778 // zero extend 32->64
782 // arg goes in specific register
784 asm_li(r, p->immI());
787 if (!p->deprecated_hasKnownReg()) {
788 // load it into the arg reg
789 int d = findMemFor(p);
790 if (p->isop(LIR_allocp)) {
791 NanoAssert(isS16(d));
793 } else if (p->isQorD()) {
799 // it must be in a saved reg
800 MR(r, p->deprecated_getReg());
804 // this is the last use, so fine to assign it
805 // to the scratch reg, it's dead after this point.
806 findSpecificRegFor(p, r);
812 Register rp = p->deprecated_getReg();
813 if (!deprecated_isKnownReg(rp) || !IsFpReg(rp)) {
814 // load it into the arg reg
815 int d = findMemFor(p);
818 // it must be in a saved reg
819 NanoAssert(IsFpReg(r) && IsFpReg(rp));
824 // this is the last use, so fine to assign it
825 // to the scratch reg, it's dead after this point.
826 findSpecificRegFor(p, r);
831 void Assembler::asm_spill(Register rr, int d, bool quad) {
849 void Assembler::asm_arith(LIns *ins) {
850 LOpcode op = ins->opcode();
851 LIns* lhs = ins->oprnd1();
852 LIns* rhs = ins->oprnd2();
853 RegisterMask allow = GpRegs;
854 Register rr = deprecated_prepResultReg(ins, allow);
855 Register ra = findRegFor(lhs, GpRegs);
858 int32_t rhsc = rhs->immI();
860 // ppc arith immediate ops sign-exted the imm16 value
875 // ppc logical immediate zero-extend the imm16 value
892 // LIR shift ops only use last 5bits of shift const
895 SLWI(rr, ra, rhsc&31);
898 SRWI(rr, ra, rhsc&31);
901 SRAWI(rr, ra, rhsc&31);
906 // general case, put rhs in register
907 Register rb = rhs==lhs ? ra : findRegFor(rhs, GpRegs&~rmask(ra));
925 case LIR_subi: SUBF(rr, rb, ra); break;
926 case LIR_lshi: SLW(rr, ra, R0); ANDI(R0, rb, 31); break;
927 case LIR_rshi: SRAW(rr, ra, R0); ANDI(R0, rb, 31); break;
928 case LIR_rshui: SRW(rr, ra, R0); ANDI(R0, rb, 31); break;
929 case LIR_muli: MULLW(rr, ra, rb); break;
946 debug_only(outputf("%s",lirNames[op]);)
951 void Assembler::asm_fop(LIns *ins) {
952 LOpcode op = ins->opcode();
953 LIns* lhs = ins->oprnd1();
954 LIns* rhs = ins->oprnd2();
955 RegisterMask allow = FpRegs;
956 Register rr = deprecated_prepResultReg(ins, allow);
958 findRegFor2(allow, lhs, ra, allow, rhs, rb);
960 case LIR_addd: FADD(rr, ra, rb); break;
961 case LIR_subd: FSUB(rr, ra, rb); break;
962 case LIR_muld: FMUL(rr, ra, rb); break;
963 case LIR_divd: FDIV(rr, ra, rb); break;
965 debug_only(outputf("%s",lirNames[op]);)
970 void Assembler::asm_i2d(LIns *ins) {
971 Register r = deprecated_prepResultReg(ins, FpRegs);
972 Register v = findRegFor(ins->oprnd1(), GpRegs);
973 const int d = 16; // natural aligned
975 #if defined NANOJIT_64BIT && !PEDANTIC
976 FCFID(r, r); // convert to double
977 LFD(r, d, SP); // load into fpu register
978 STD(v, d, SP); // save int64
979 EXTSW(v, v); // extend sign destructively, ok since oprnd1 only is 32bit
982 LFD(r, d, SP); // scratch area in outgoing linkage area
984 XORIS(R0, v, 0x8000);
993 void Assembler::asm_ui2d(LIns *ins) {
994 Register r = deprecated_prepResultReg(ins, FpRegs);
995 Register v = findRegFor(ins->oprnd1(), GpRegs);
998 #if defined NANOJIT_64BIT && !PEDANTIC
999 FCFID(r, r); // convert to double
1000 LFD(r, d, SP); // load into fpu register
1001 STD(v, d, SP); // save int64
1002 CLRLDI(v, v, 32); // zero-extend destructively
1015 void Assembler::asm_d2i(LIns*) {
1016 NanoAssertMsg(0, "NJ_F2I_SUPPORTED not yet supported for this architecture");
1019 #if defined NANOJIT_64BIT
1020 // XXX: this is sub-optimal, see https://bugzilla.mozilla.org/show_bug.cgi?id=540368#c7.
1021 void Assembler::asm_q2i(LIns *ins) {
1022 Register rr = deprecated_prepResultReg(ins, GpRegs);
1023 int d = findMemFor(ins->oprnd1());
1027 void Assembler::asm_ui2uq(LIns *ins) {
1028 LOpcode op = ins->opcode();
1029 Register r = deprecated_prepResultReg(ins, GpRegs);
1030 Register v = findRegFor(ins->oprnd1(), GpRegs);
1033 debug_only(outputf("%s",lirNames[op]));
1036 CLRLDI(r, v, 32); // clears the top 32 bits
1044 void Assembler::asm_dasq(LIns*) {
1048 void Assembler::asm_qasd(LIns*) {
1054 #ifdef NANOJIT_64BIT
1055 void Assembler::asm_immq(LIns *ins) {
1056 Register r = ins->deprecated_getReg();
1057 if (deprecated_isKnownReg(r) && (rmask(r) & FpRegs)) {
1058 // FPR already assigned, fine, use it
1059 deprecated_freeRsrcOf(ins);
1061 // use a GPR register; its okay to copy doubles with GPR's
1062 // but *not* okay to copy non-doubles with FPR's
1063 r = deprecated_prepResultReg(ins, GpRegs);
1066 if (rmask(r) & FpRegs) {
1070 int32_t hi, lo; // Always assuming big-endian in NativePPC.cpp
1081 int64_t q = ins->immQ();
1083 asm_li(r, int32_t(q));
1086 RLDIMI(r,R0,32,0); // or 32,32?
1087 asm_li(R0, int32_t(q>>32)); // hi bits into R0
1088 asm_li(r, int32_t(q)); // lo bits into dest reg
1093 void Assembler::asm_immd(LIns *ins) {
1094 #ifdef NANOJIT_64BIT
1095 Register r = ins->deprecated_getReg();
1096 if (deprecated_isKnownReg(r) && (rmask(r) & FpRegs)) {
1097 // FPR already assigned, fine, use it
1098 deprecated_freeRsrcOf(ins);
1100 // use a GPR register; its okay to copy doubles with GPR's
1101 // but *not* okay to copy non-doubles with FPR's
1102 r = deprecated_prepResultReg(ins, GpRegs);
1105 Register r = deprecated_prepResultReg(ins, FpRegs);
1108 if (rmask(r) & FpRegs) {
1112 int32_t hi, lo; // Always assuming big-endian in NativePPC.cpp
1123 int64_t q = ins->immDasQ();
1125 asm_li(r, int32_t(q));
1128 RLDIMI(r,R0,32,0); // or 32,32?
1129 asm_li(R0, int32_t(q>>32)); // hi bits into R0
1130 asm_li(r, int32_t(q)); // lo bits into dest reg
1134 void Assembler::br(NIns* addr, int link) {
1135 // destination unknown, then use maximum branch possible
1141 // powerpc offsets are based on the address of the branch instruction
1142 underrunProtect(4); // ensure _nIns is addr of Bx
1143 ptrdiff_t offset = addr - (_nIns-1); // we want ptr diff's implicit >>2 here
1146 if (isS24(offset)) {
1147 Bx(offset, 0, link); // b addr or bl addr
1150 ptrdiff_t absaddr = addr - (NIns*)0; // ptr diff implies >>2
1151 if (isS24(absaddr)) {
1152 Bx(absaddr, 1, link); // ba addr or bla addr
1160 void Assembler::br_far(NIns* addr, int link) {
1162 // can't have a page break in this sequence, because the break
1163 // would also clobber ctr and r2. We use R2 here because it's not available
1164 // to the register allocator, and we use R0 everywhere else as scratch, so using
1165 // R2 here avoids clobbering anything else besides CTR.
1166 #ifdef NANOJIT_64BIT
1167 if (addr==0 || !isU32(uintptr_t(addr))) {
1168 // really far jump to 64bit abs addr
1169 underrunProtect(28); // 7 instructions
1172 asm_li64(R2, uintptr_t(addr)); // 5 instructions
1176 underrunProtect(16);
1179 asm_li32(R2, uint32_t(uintptr_t(addr))); // 2 instructions
1182 void Assembler::underrunProtect(int bytes) {
1183 NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
1184 int instr = (bytes + sizeof(NIns) - 1) / sizeof(NIns);
1186 NIns *top = codeStart; // this may be in a normal code chunk or an exit code chunk
1189 // pedanticTop is based on the last call to underrunProtect; any time we call
1190 // underrunProtect and would use more than what's already protected, then insert
1191 // a page break jump. Sometimes, this will be to a new page, usually it's just
1192 // the next instruction and the only effect is to clobber R2 & CTR
1194 NanoAssert(pedanticTop >= top);
1195 if (pc - instr < pedanticTop) {
1196 // no page break required, but insert a far branch anyway just to be difficult
1197 #ifdef NANOJIT_64BIT
1198 const int br_size = 7;
1200 const int br_size = 4;
1202 if (pc - instr - br_size < top) {
1203 // really do need a page break
1204 verbose_only(if (_logc->lcbits & LC_Native) outputf("newpage %p:", pc);)
1207 // now emit the jump, but make sure we won't need another page break.
1208 // we're pedantic, but not *that* pedantic.
1209 pedanticTop = _nIns - br_size;
1211 pedanticTop = _nIns - instr;
1214 if (pc - instr < top) {
1215 verbose_only(if (_logc->lcbits & LC_Native) outputf("newpage %p:", pc);)
1216 // This may be in a normal code chunk or an exit code chunk.
1217 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1218 // This jump will call underrunProtect again, but since we're on a new
1219 // page, nothing will happen.
1225 void Assembler::asm_cmov(LIns* ins)
1227 LIns* condval = ins->oprnd1();
1228 LIns* iftrue = ins->oprnd2();
1229 LIns* iffalse = ins->oprnd3();
1231 #ifdef NANOJIT_64BIT
1232 NanoAssert((ins->opcode() == LIR_cmovi && iftrue->isI() && iffalse->isI()) ||
1233 (ins->opcode() == LIR_cmovq && iftrue->isQ() && iffalse->isQ()));
1235 NanoAssert((ins->opcode() == LIR_cmovi && iftrue->isI() && iffalse->isI()));
1238 Register rr = prepareResultReg(ins, GpRegs);
1239 Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
1241 // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
1242 Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
1244 underrunProtect(16); // make sure branch target and branch are on same page and thus near
1245 NIns *after = _nIns;
1246 verbose_only(if (_logc->lcbits & LC_Native) outputf("%p:",after);)
1249 NanoAssert(isS24(after - (_nIns-1)));
1250 asm_branch_near(false, condval, after);
1255 freeResourcesOf(ins);
1256 if (!iftrue->isInReg()) {
1257 NanoAssert(rt == rr);
1258 findSpecificRegForUnallocated(iftrue, rr);
1261 asm_cmp(condval->opcode(), condval->oprnd1(), condval->oprnd2(), CR7);
1264 RegisterMask Assembler::nHint(LIns* ins) {
1265 NanoAssert(ins->isop(LIR_paramp));
1266 RegisterMask prefer = 0;
1267 if (ins->paramKind() == 0)
1268 if (ins->paramArg() < 8)
1269 prefer = rmask(argRegs[ins->paramArg()]);
1273 void Assembler::asm_neg_not(LIns *ins) {
1274 Register rr = deprecated_prepResultReg(ins, GpRegs);
1275 Register ra = findRegFor(ins->oprnd1(), GpRegs);
1276 if (ins->isop(LIR_negi)) {
1283 void Assembler::nInit(AvmCore*) {
1284 nHints[LIR_calli] = rmask(R3);
1285 #ifdef NANOJIT_64BIT
1286 nHints[LIR_callq] = rmask(R3);
1288 nHints[LIR_calld] = rmask(F1);
1289 nHints[LIR_paramp] = PREFER_SPECIAL;
1292 void Assembler::nBeginAssembly() {
1296 void Assembler::nativePageSetup() {
1297 NanoAssert(!_inExit);
1299 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1300 IF_PEDANTIC( pedanticTop = _nIns; )
1304 void Assembler::nativePageReset()
1307 // Increment the 32-bit profiling counter at pCtr, without
1308 // changing any registers.
1310 void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
1315 void Assembler::nPatchBranch(NIns *branch, NIns *target) {
1316 // ppc relative offsets are based on the addr of the branch instruction
1317 ptrdiff_t bd = target - branch;
1318 if (branch[0] == PPC_b) {
1319 // unconditional, 24bit offset. Whoever generated the unpatched jump
1320 // must have known the final size would fit in 24bits! otherwise the
1321 // jump would be (lis,ori,mtctr,bctr) and we'd be patching the lis,ori.
1322 NanoAssert(isS24(bd));
1323 branch[0] |= (bd & 0xffffff) << 2;
1325 else if ((branch[0] & PPC_bc) == PPC_bc) {
1326 // conditional, 14bit offset. Whoever generated the unpatched jump
1327 // must have known the final size would fit in 14bits! otherwise the
1328 // jump would be (lis,ori,mtctr,bcctr) and we'd be patching the lis,ori below.
1329 NanoAssert(isS14(bd));
1330 NanoAssert(((branch[0] & 0x3fff)<<2) == 0);
1331 branch[0] |= (bd & 0x3fff) << 2;
1334 #ifdef NANOJIT_64BIT
1335 // patch 64bit branch
1336 else if ((branch[0] & ~(31<<21)) == PPC_addis) {
1337 // general branch, using lis,ori,sldi,oris,ori to load the const 64bit addr.
1338 Register rd = { (branch[0] >> 21) & 31 };
1339 NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
1340 NanoAssert(branch[3] == PPC_oris | GPR(rd)<<21 | GPR(rd)<<16);
1341 NanoAssert(branch[4] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
1342 uint64_t imm = uintptr_t(target);
1343 uint32_t lo = uint32_t(imm);
1344 uint32_t hi = uint32_t(imm>>32);
1345 branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(hi>>16);
1346 branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(hi);
1347 branch[3] = PPC_oris | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo>>16);
1348 branch[4] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo);
1350 #else // NANOJIT_64BIT
1351 // patch 32bit branch
1352 else if ((branch[0] & ~(31<<21)) == PPC_addis) {
1353 // general branch, using lis,ori to load the const addr.
1354 // patch a lis,ori sequence with a 32bit value
1355 Register rd = { (branch[0] >> 21) & 31 };
1356 NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
1357 uint32_t imm = uint32_t(target);
1358 branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(imm >> 16); // lis rd, imm >> 16
1359 branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(imm); // ori rd, rd, imm & 0xffff
1361 #endif // !NANOJIT_64BIT
1363 TODO(unknown_patch);
1367 static int cntzlw(int set) {
1368 // On PowerPC, prefer higher registers, to minimize
1369 // size of nonvolatile area that must be saved.
1370 register uint32_t i;
1372 asm ("cntlzw %0,%1" : "=r" (i) : "r" (set));
1374 # error("unsupported compiler")
1379 Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
1381 // note, deliberate truncation of 64->32 bits
1382 if (set & 0xffffffff) {
1383 i = cntzlw(int(set)); // gp reg
1385 i = 32 + cntzlw(int(set>>32)); // fp reg
1388 _allocator.free &= ~rmask(r);
1392 void Assembler::nRegisterResetAll(RegAlloc ®s) {
1394 regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
1397 #ifdef NANOJIT_64BIT
1398 void Assembler::asm_qbinop(LIns *ins) {
1399 LOpcode op = ins->opcode();
1411 debug_only(outputf("%s",lirNames[op]));
1415 #endif // NANOJIT_64BIT
1417 void Assembler::nFragExit(LIns*) {
1421 void Assembler::asm_jtbl(LIns* ins, NIns** native_table)
1423 // R0 = index*4, R2 = table, CTR = computed address to jump to.
1424 // must ensure no page breaks in here because R2 & CTR can get clobbered.
1425 Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
1426 #ifdef NANOJIT_64BIT
1427 underrunProtect(9*4);
1428 BCTR(0); // jump to address in CTR
1429 MTCTR(R2); // CTR = R2
1430 LDX(R2, R2, R0); // R2 = [table + index*8]
1431 SLDI(R0, indexreg, 3); // R0 = index*8
1432 asm_li64(R2, uint64_t(native_table)); // R2 = table (5 instr)
1434 underrunProtect(6*4);
1435 BCTR(0); // jump to address in CTR
1436 MTCTR(R2); // CTR = R2
1437 LWZX(R2, R2, R0); // R2 = [table + index*4]
1438 SLWI(R0, indexreg, 2); // R0 = index*4
1439 asm_li(R2, int32_t(native_table)); // R2 = table (up to 2 instructions)
1443 void Assembler::swapCodeChunks() {
1445 codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
1447 SWAP(NIns*, _nIns, _nExitIns);
1448 SWAP(NIns*, codeStart, exitStart);
1449 SWAP(NIns*, codeEnd, exitEnd);
1450 verbose_only( SWAP(size_t, codeBytes, exitBytes); )
1453 void Assembler::asm_insert_random_nop() {
1454 NanoAssert(0); // not supported
1457 } // namespace nanojit
1459 #endif // FEATURE_NANOJIT && NANOJIT_PPC