js/src/nanojit/NativePPC.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2008
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "nanojit.h"
  41
  42 #if defined FEATURE_NANOJIT && defined NANOJIT_PPC
  43
  44 namespace nanojit
  45 {
  46     const Register Assembler::retRegs[] = { R3, R4 }; // high=R3, low=R4
  47     const Register Assembler::argRegs[] = { R3, R4, R5, R6, R7, R8, R9, R10 };
  48
  49     const Register Assembler::savedRegs[] = {
  50     #if !defined NANOJIT_64BIT
  51         R13,
  52     #endif
  53         R14, R15, R16, R17, R18, R19, R20, R21, R22,
  54         R23, R24, R25, R26, R27, R28, R29, R30
  55     };
  56
  57     const char *regNames[] = {
  58         "r0",  "sp",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
  59         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
  60         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
  61         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
  62         "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
  63         "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
  64         "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
  65         "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
  66     };
  67
  68     const char *bitNames[] = { "lt", "gt", "eq", "so" };
  69
  70     #define TODO(x) do{ avmplus::AvmLog(#x); NanoAssertMsgf(false, "%s", #x); } while(0)
  71
  72     /*
  73      * see http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/index.html
  74      * stack layout (higher address going down)
  75      * sp ->    out linkage area
  76      *          out parameter area
  77      *          local variables
  78      *          saved registers
  79      * sp' ->   in linkage area
  80      *          in parameter area
  81      *
  82      * linkage area layout:
  83      * PPC32    PPC64
  84      * sp+0     sp+0    saved sp
  85      * sp+4     sp+8    saved cr
  86      * sp+8     sp+16   saved lr
  87      * sp+12    sp+24   reserved
  88      */
  89
  90     const int min_param_area_size = 8*sizeof(void*); // r3-r10
  91     const int linkage_size = 6*sizeof(void*);
  92     const int lr_offset = 2*sizeof(void*); // linkage.lr
  93     const int cr_offset = 1*sizeof(void*); // linkage.cr
  94
  95     NIns* Assembler::genPrologue() {
  96         // mflr r0
  97         // stw r0, lr_offset(sp)
  98         // stwu sp, -framesize(sp)
  99
 100         // param_area must be at least large enough for r3-r10 to be saved,
 101         // regardless of whether we think the callee needs less: e.g., the callee
 102         // might tail-call to a function that uses varargs, which could flush
 103         // r3-r10 to the parameter area.
 104         uint32_t param_area = (max_param_size > min_param_area_size) ? max_param_size : min_param_area_size;
 105         // activation frame is 4 bytes per entry even on 64bit machines
 106         uint32_t stackNeeded = param_area + linkage_size + _activation.stackSlotsNeeded() * 4;
 107         uint32_t aligned = alignUp(stackNeeded, NJ_ALIGN_STACK);
 108
 109         UNLESS_PEDANTIC( if (isS16(aligned)) {
 110             STPU(SP, -aligned, SP); // *(sp-aligned) = sp; sp -= aligned
 111         } else ) {
 112             STPUX(SP, SP, R0);
 113             asm_li(R0, -aligned);
 114         }
 115
 116         NIns *patchEntry = _nIns;
 117         MR(FP,SP);              // save SP to use as a FP
 118         STP(FP, cr_offset, SP); // cheat and save our FP in linkage.cr
 119         STP(R0, lr_offset, SP); // save LR in linkage.lr
 120         MFLR(R0);
 121
 122         return patchEntry;
 123     }
 124
 125     NIns* Assembler::genEpilogue() {
 126         BLR();
 127         MTLR(R0);
 128         LP(R0, lr_offset, SP);
 129         LP(FP, cr_offset, SP); // restore FP from linkage.cr
 130         MR(SP,FP);
 131         return _nIns;
 132     }
 133
 134     void Assembler::asm_load32(LIns *ins) {
 135         LIns* base = ins->oprnd1();
 136         int d = ins->disp();
 137         Register rr = deprecated_prepResultReg(ins, GpRegs);
 138         Register ra = getBaseReg(base, d, GpRegs);
 139
 140         switch(ins->opcode()) {
 141             case LIR_lduc2ui:
 142                 if (isS16(d)) {
 143                     LBZ(rr, d, ra);
 144                 } else {
 145                     LBZX(rr, ra, R0); // rr = [ra+R0]
 146                     asm_li(R0,d);
 147                 }
 148                 return;
 149             case LIR_ldus2ui:
 150                 // these are expected to be 2 or 4-byte aligned
 151                 if (isS16(d)) {
 152                     LHZ(rr, d, ra);
 153                 } else {
 154                     LHZX(rr, ra, R0); // rr = [ra+R0]
 155                     asm_li(R0,d);
 156                 }
 157                 return;
 158             case LIR_ldi:
 159                 // these are expected to be 4-byte aligned
 160                 if (isS16(d)) {
 161                     LWZ(rr, d, ra);
 162                 } else {
 163                     LWZX(rr, ra, R0); // rr = [ra+R0]
 164                     asm_li(R0,d);
 165                 }
 166                 return;
 167             case LIR_ldc2i:
 168             case LIR_lds2i:
 169                 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
 170                 return;
 171             default:
 172                 NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
 173                 return;
 174         }
 175     }
 176
 177     void Assembler::asm_store32(LOpcode op, LIns *value, int32_t dr, LIns *base) {
 178
 179         switch (op) {
 180             case LIR_sti:
 181             case LIR_sti2c:
 182                 // handled by mainline code below for now
 183                 break;
 184             case LIR_sti2s:
 185                 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
 186                 return;
 187             default:
 188                 NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
 189                 return;
 190         }
 191
 192         Register rs = findRegFor(value, GpRegs);
 193         Register ra = value == base ? rs : getBaseReg(base, dr, GpRegs & ~rmask(rs));
 194
 195     #if !PEDANTIC
 196         if (isS16(dr)) {
 197             switch (op) {
 198             case LIR_sti:
 199                 STW(rs, dr, ra);
 200                 break;
 201             case LIR_sti2c:
 202                 STB(rs, dr, ra);
 203                 break;
 204             }
 205             return;
 206         }
 207     #endif
 208
 209         // general case store, any offset size
 210         switch (op) {
 211         case LIR_sti:
 212             STWX(rs, ra, R0);
 213             break;
 214         case LIR_sti2c:
 215             STBX(rs, ra, R0);
 216             break;
 217         }
 218         asm_li(R0, dr);
 219     }
 220
 221     void Assembler::asm_load64(LIns *ins) {
 222
 223         switch (ins->opcode()) {
 224             case LIR_ldd:
 225             CASE64(LIR_ldq:)
 226                 // handled by mainline code below for now
 227                 break;
 228             case LIR_ldf2d:
 229                 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
 230                 return;
 231             default:
 232                 NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
 233                 return;
 234         }
 235
 236         LIns* base = ins->oprnd1();
 237     #ifdef NANOJIT_64BIT
 238         Register rr = ins->deprecated_getReg();
 239         if (deprecated_isKnownReg(rr) && (rmask(rr) & FpRegs)) {
 240             // FPR already assigned, fine, use it
 241             deprecated_freeRsrcOf(ins);
 242         } else {
 243             // use a GPR register; its okay to copy doubles with GPR's
 244             // but *not* okay to copy non-doubles with FPR's
 245             rr = deprecated_prepResultReg(ins, GpRegs);
 246         }
 247     #else
 248         Register rr = deprecated_prepResultReg(ins, FpRegs);
 249     #endif
 250
 251         int dr = ins->disp();
 252         Register ra = getBaseReg(base, dr, GpRegs);
 253
 254     #ifdef NANOJIT_64BIT
 255         if (rmask(rr) & GpRegs) {
 256             #if !PEDANTIC
 257                 if (isS16(dr)) {
 258                     LD(rr, dr, ra);
 259                     return;
 260                 }
 261             #endif
 262             // general case 64bit GPR load
 263             LDX(rr, ra, R0);
 264             asm_li(R0, dr);
 265             return;
 266         }
 267     #endif
 268
 269         // FPR
 270     #if !PEDANTIC
 271         if (isS16(dr)) {
 272             LFD(rr, dr, ra);
 273             return;
 274         }
 275     #endif
 276
 277         // general case FPR load
 278         LFDX(rr, ra, R0);
 279         asm_li(R0, dr);
 280     }
 281
 282     void Assembler::asm_li(Register r, int32_t imm) {
 283     #if !PEDANTIC
 284         if (isS16(imm)) {
 285             LI(r, imm);
 286             return;
 287         }
 288         if ((imm & 0xffff) == 0) {
 289             imm = uint32_t(imm) >> 16;
 290             LIS(r, imm);
 291             return;
 292         }
 293     #endif
 294         asm_li32(r, imm);
 295     }
 296
 297     void Assembler::asm_li32(Register r, int32_t imm) {
 298         // general case
 299         // TODO use ADDI instead of ORI if r != r0, impl might have 3way adder
 300         ORI(r, r, imm);
 301         LIS(r, imm>>16);  // on ppc64, this sign extends
 302     }
 303
 304     void Assembler::asm_li64(Register r, uint64_t imm) {
 305         underrunProtect(5*sizeof(NIns)); // must be contiguous to be patchable
 306         ORI(r,r,uint16_t(imm));        // r[0:15] = imm[0:15]
 307         ORIS(r,r,uint16_t(imm>>16));   // r[16:31] = imm[16:31]
 308         SLDI(r,r,32);                  // r[32:63] = r[0:31], r[0:31] = 0
 309         asm_li32(r, int32_t(imm>>32)); // r[0:31] = imm[32:63]
 310     }
 311
 312     void Assembler::asm_store64(LOpcode op, LIns *value, int32_t dr, LIns *base) {
 313         NanoAssert(value->isQorD());
 314
 315         switch (op) {
 316             case LIR_std:
 317             CASE64(LIR_stq:)
 318                 // handled by mainline code below for now
 319                 break;
 320             case LIR_std2f:
 321                 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
 322                 return;
 323             default:
 324                 NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
 325                 return;
 326         }
 327
 328         Register ra = getBaseReg(base, dr, GpRegs);
 329
 330         // general case for any value
 331     #if !defined NANOJIT_64BIT
 332         // on 32bit cpu's, we only use store64 for doubles
 333         Register rs = findRegFor(value, FpRegs);
 334     #else
 335         // if we have to choose a register, use a GPR
 336         Register rs = ( !value->isInReg()
 337                       ? findRegFor(value, GpRegs & ~rmask(ra))
 338                       : value->deprecated_getReg() );
 339
 340         if (rmask(rs) & GpRegs) {
 341         #if !PEDANTIC
 342             if (isS16(dr)) {
 343                 // short offset
 344                 STD(rs, dr, ra);
 345                 return;
 346             }
 347         #endif
 348             // general case store 64bit GPR
 349             STDX(rs, ra, R0);
 350             asm_li(R0, dr);
 351             return;
 352         }
 353     #endif // NANOJIT_64BIT
 354
 355     #if !PEDANTIC
 356         if (isS16(dr)) {
 357             // short offset
 358             STFD(rs, dr, ra);
 359             return;
 360         }
 361     #endif
 362
 363         // general case for any offset
 364         STFDX(rs, ra, R0);
 365         asm_li(R0, dr);
 366     }
 367
 368     void Assembler::asm_cond(LIns *ins) {
 369         LOpcode op = ins->opcode();
 370         LIns *a = ins->oprnd1();
 371         LIns *b = ins->oprnd2();
 372         ConditionRegister cr = CR7;
 373         Register r = deprecated_prepResultReg(ins, GpRegs);
 374         switch (op) {
 375         case LIR_eqi: case LIR_eqd:
 376         CASE64(LIR_eqq:)
 377             EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
 378             MFCR(r);
 379             break;
 380         case LIR_lti: case LIR_ltui:
 381         case LIR_ltd: case LIR_led:
 382         CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
 383             EXTRWI(r, r, 1, 4*cr+COND_lt); // extract CR7.lt
 384             MFCR(r);
 385             break;
 386         case LIR_gti: case LIR_gtui:
 387         case LIR_gtd: case LIR_ged:
 388         CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
 389             EXTRWI(r, r, 1, 4*cr+COND_gt); // extract CR7.gt
 390             MFCR(r);
 391             break;
 392         case LIR_lei: case LIR_leui:
 393         CASE64(LIR_leq:) CASE64(LIR_leuq:)
 394             EXTRWI(r, r, 1, 4*cr+COND_eq); // extract CR7.eq
 395             MFCR(r);
 396             CROR(CR7, eq, lt, eq);
 397             break;
 398         case LIR_gei: case LIR_geui:
 399         CASE64(LIR_geq:) CASE64(LIR_geuq:)
 400             EXTRWI(r, r, 1, 4*cr+COND_eq); // select CR7.eq
 401             MFCR(r);
 402             CROR(CR7, eq, gt, eq);
 403             break;
 404         default:
 405             debug_only(outputf("%s",lirNames[ins->opcode()]);)
 406             TODO(asm_cond);
 407             break;
 408         }
 409         asm_cmp(op, a, b, cr);
 410     }
 411
 412     void Assembler::asm_condd(LIns *ins) {
 413         asm_cond(ins);
 414     }
 415
 416     // cause sign extension to test bits.  ptrdiff_t is a signed,
 417     // pointer-sized int
 418     static inline bool isS14(ptrdiff_t d) {
 419         const int shift = sizeof(ptrdiff_t) * 8 - 14; // 18 or 50
 420         return ((d << shift) >> shift) == d;
 421     }
 422
 423     NIns* Assembler::asm_branch(bool onfalse, LIns *cond, NIns * const targ) {
 424         LOpcode condop = cond->opcode();
 425         NanoAssert(cond->isCmp());
 426
 427         // powerpc offsets are based on the address of the branch instruction
 428         NIns *patch;
 429     #if !PEDANTIC
 430         ptrdiff_t bd = targ - (_nIns-1);
 431         if (targ && isS24(bd))
 432             patch = asm_branch_near(onfalse, cond, targ);
 433         else
 434     #endif
 435             patch = asm_branch_far(onfalse, cond, targ);
 436         asm_cmp(condop, cond->oprnd1(), cond->oprnd2(), CR7);
 437         return patch;
 438     }
 439
 440     NIns* Assembler::asm_branch_near(bool onfalse, LIns *cond, NIns * const targ) {
 441         NanoAssert(targ != 0);
 442         underrunProtect(4);
 443         ptrdiff_t bd = targ - (_nIns-1);
 444         NIns *patch = 0;
 445         if (!isS14(bd)) {
 446             underrunProtect(8);
 447             bd = targ - (_nIns-1);
 448             if (isS24(bd)) {
 449                 // can't fit conditional branch offset into 14 bits, but
 450                 // we can fit in 24, so invert the condition and branch
 451                 // around an unconditional jump
 452                 verbose_only(verbose_outputf("%p:", _nIns);)
 453                 NIns *skip = _nIns;
 454                 B(bd);
 455                 patch = _nIns; // this is the patchable branch to the given target
 456                 onfalse = !onfalse;
 457                 bd = skip - (_nIns-1);
 458                 NanoAssert(isS14(bd));
 459                 verbose_only(verbose_outputf("branch24");)
 460             }
 461             else {
 462                 // known far target
 463                 return asm_branch_far(onfalse, cond, targ);
 464             }
 465         }
 466         ConditionRegister cr = CR7;
 467         switch (cond->opcode()) {
 468         case LIR_eqi:
 469         case LIR_eqd:
 470         CASE64(LIR_eqq:)
 471             if (onfalse) BNE(cr,bd); else BEQ(cr,bd);
 472             break;
 473         case LIR_lti: case LIR_ltui:
 474         case LIR_ltd: case LIR_led:
 475         CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
 476             if (onfalse) BNL(cr,bd); else BLT(cr,bd);
 477             break;
 478         case LIR_lei: case LIR_leui:
 479         CASE64(LIR_leq:) CASE64(LIR_leuq:)
 480             if (onfalse) BGT(cr,bd); else BLE(cr,bd);
 481             break;
 482         case LIR_gti: case LIR_gtui:
 483         case LIR_gtd: case LIR_ged:
 484         CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
 485             if (onfalse) BNG(cr,bd); else BGT(cr,bd);
 486             break;
 487         case LIR_gei: case LIR_geui:
 488         CASE64(LIR_geq:) CASE64(LIR_geuq:)
 489             if (onfalse) BLT(cr,bd); else BGE(cr,bd);
 490             break;
 491         default:
 492             debug_only(outputf("%s",lirNames[cond->opcode()]);)
 493             TODO(unknown_cond);
 494         }
 495         if (!patch)
 496             patch = _nIns;
 497         return patch;
 498     }
 499
 500     // general case branch to any address (using CTR)
 501     NIns *Assembler::asm_branch_far(bool onfalse, LIns *cond, NIns * const targ) {
 502         LOpcode condop = cond->opcode();
 503         ConditionRegister cr = CR7;
 504         underrunProtect(16);
 505         switch (condop) {
 506         case LIR_eqi:
 507         case LIR_eqd:
 508         CASE64(LIR_eqq:)
 509             if (onfalse) BNECTR(cr); else BEQCTR(cr);
 510             break;
 511         case LIR_lti: case LIR_ltui:
 512         CASE64(LIR_ltq:) CASE64(LIR_ltuq:)
 513         case LIR_ltd: case LIR_led:
 514             if (onfalse) BNLCTR(cr); else BLTCTR(cr);
 515             break;
 516         case LIR_lei: case LIR_leui:
 517         CASE64(LIR_leq:) CASE64(LIR_leuq:)
 518             if (onfalse) BGTCTR(cr); else BLECTR(cr);
 519             break;
 520         case LIR_gti: case LIR_gtui:
 521         CASE64(LIR_gtq:) CASE64(LIR_gtuq:)
 522         case LIR_gtd: case LIR_ged:
 523             if (onfalse) BNGCTR(cr); else BGTCTR(cr);
 524             break;
 525         case LIR_gei: case LIR_geui:
 526         CASE64(LIR_geq:) CASE64(LIR_geuq:)
 527             if (onfalse) BLTCTR(cr); else BGECTR(cr);
 528             break;
 529         default:
 530             debug_only(outputf("%s",lirNames[condop]);)
 531             TODO(unknown_cond);
 532         }
 533
 534     #if !defined NANOJIT_64BIT
 535         MTCTR(R0);
 536         asm_li32(R0, (int)targ);
 537     #else
 538         MTCTR(R0);
 539         if (!targ || !isU32(uintptr_t(targ))) {
 540             asm_li64(R0, uint64_t(targ));
 541         } else {
 542             asm_li32(R0, uint32_t(uintptr_t(targ)));
 543         }
 544     #endif
 545         return _nIns;
 546     }
 547
 548     NIns* Assembler::asm_branch_ov(LOpcode, NIns*) {
 549         TODO(asm_branch_ov);
 550         return _nIns;
 551     }
 552
 553     void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
 554         RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
 555         Register ra = findRegFor(a, allow);
 556
 557     #if !PEDANTIC
 558         if (b->isImmI()) {
 559             int32_t d = b->immI();
 560             if (isS16(d)) {
 561                 if (isCmpSIOpcode(condop)) {
 562                     CMPWI(cr, ra, d);
 563                     return;
 564                 }
 565     #if defined NANOJIT_64BIT
 566                 if (isCmpSQOpcode(condop)) {
 567                     CMPDI(cr, ra, d);
 568                     TODO(cmpdi);
 569                     return;
 570                 }
 571     #endif
 572             }
 573             if (isU16(d)) {
 574                 if (isCmpUIOpcode(condop)) {
 575                     CMPLWI(cr, ra, d);
 576                     return;
 577                 }
 578     #if defined NANOJIT_64BIT
 579                 if (isCmpUQOpcode(condop)) {
 580                     CMPLDI(cr, ra, d);
 581                     TODO(cmpldi);
 582                     return;
 583                 }
 584     #endif
 585             }
 586         }
 587     #endif
 588
 589         // general case
 590         Register rb = b==a ? ra : findRegFor(b, allow & ~rmask(ra));
 591         if (isCmpSIOpcode(condop)) {
 592             CMPW(cr, ra, rb);
 593         }
 594         else if (isCmpUIOpcode(condop)) {
 595             CMPLW(cr, ra, rb);
 596         }
 597     #if defined NANOJIT_64BIT
 598         else if (isCmpSQOpcode(condop)) {
 599             CMPD(cr, ra, rb);
 600         }
 601         else if (isCmpUQOpcode(condop)) {
 602             CMPLD(cr, ra, rb);
 603         }
 604     #endif
 605         else if (isCmpDOpcode(condop)) {
 606             // set the lt/gt bit for fle/fge.  We don't do this for
 607             // int/uint because in those cases we can invert the branch condition.
 608             // for float, we can't because of unordered comparisons
 609             if (condop == LIR_led)
 610                 CROR(cr, lt, lt, eq); // lt = lt|eq
 611             else if (condop == LIR_ged)
 612                 CROR(cr, gt, gt, eq); // gt = gt|eq
 613             FCMPU(cr, ra, rb);
 614         }
 615         else {
 616             TODO(asm_cmp);
 617         }
 618     }
 619
 620     void Assembler::asm_ret(LIns *ins) {
 621         genEpilogue();
 622         releaseRegisters();
 623         assignSavedRegs();
 624         LIns *value = ins->oprnd1();
 625         Register r = ins->isop(LIR_retd) ? F1 : R3;
 626         findSpecificRegFor(value, r);
 627     }
 628
 629     void Assembler::asm_nongp_copy(Register r, Register s) {
 630         // PPC doesn't support any GPR<->FPR moves
 631         NanoAssert((rmask(r) & FpRegs) && (rmask(s) & FpRegs));
 632         FMR(r, s);
 633     }
 634
 635     bool Assembler::canRemat(LIns* ins)
 636     {
 637         return ins->isImmI() || ins->isop(LIR_allocp);
 638     }
 639
 640     void Assembler::asm_restore(LIns *i, Register r) {
 641         int d;
 642         if (i->isop(LIR_allocp)) {
 643             d = deprecated_disp(i);
 644             ADDI(r, FP, d);
 645         }
 646         else if (i->isImmI()) {
 647             asm_li(r, i->immI());
 648         }
 649         else {
 650             d = findMemFor(i);
 651             if (IsFpReg(r)) {
 652                 NanoAssert(i->isQorD());
 653                 LFD(r, d, FP);
 654             } else if (i->isQorD()) {
 655                 NanoAssert(IsGpReg(r));
 656                 LD(r, d, FP);
 657             } else {
 658                 NanoAssert(i->isI());
 659                 NanoAssert(IsGpReg(r));
 660                 LWZ(r, d, FP);
 661             }
 662         }
 663     }
 664
 665     void Assembler::asm_immi(LIns *ins) {
 666         Register rr = deprecated_prepResultReg(ins, GpRegs);
 667         asm_li(rr, ins->immI());
 668     }
 669
 670     void Assembler::asm_fneg(LIns *ins) {
 671         Register rr = deprecated_prepResultReg(ins, FpRegs);
 672         Register ra = findRegFor(ins->oprnd1(), FpRegs);
 673         FNEG(rr,ra);
 674     }
 675
 676     void Assembler::asm_param(LIns *ins) {
 677         uint32_t a = ins->paramArg();
 678         uint32_t kind = ins->paramKind();
 679         if (kind == 0) {
 680             // ordinary param
 681             // first eight args always in R3..R10 for PPC
 682             if (a < 8) {
 683                 // incoming arg in register
 684                 deprecated_prepResultReg(ins, rmask(argRegs[a]));
 685             } else {
 686                 // todo: support stack based args, arg 0 is at [FP+off] where off
 687                 // is the # of regs to be pushed in genProlog()
 688                 TODO(asm_param_stk);
 689             }
 690         }
 691         else {
 692             // saved param
 693             deprecated_prepResultReg(ins, rmask(savedRegs[a]));
 694         }
 695     }
 696
 697     void Assembler::asm_call(LIns *ins) {
 698         if (!ins->isop(LIR_callv)) {
 699             Register retReg = ( ins->isop(LIR_calld) ? F1 : retRegs[0] );
 700             deprecated_prepResultReg(ins, rmask(retReg));
 701         }
 702
 703         // Do this after we've handled the call result, so we don't
 704         // force the call result to be spilled unnecessarily.
 705         evictScratchRegsExcept(0);
 706
 707         const CallInfo* call = ins->callInfo();
 708         ArgType argTypes[MAXARGS];
 709         uint32_t argc = call->getArgTypes(argTypes);
 710
 711         bool indirect;
 712         if (!(indirect = call->isIndirect())) {
 713             verbose_only(if (_logc->lcbits & LC_Native)
 714                 outputf("        %p:", _nIns);
 715             )
 716             br((NIns*)call->_address, 1);
 717         } else {
 718             // Indirect call: we assign the address arg to R11 since it's not
 719             // used for regular arguments, and is otherwise scratch since it's
 720             // clobberred by the call.
 721             underrunProtect(8); // underrunProtect might clobber CTR
 722             BCTRL();
 723             MTCTR(R11);
 724             asm_regarg(ARGTYPE_P, ins->arg(--argc), R11);
 725         }
 726
 727         int param_size = 0;
 728
 729         Register r = R3;
 730         Register fr = F1;
 731         for(uint32_t i = 0; i < argc; i++) {
 732             uint32_t j = argc - i - 1;
 733             ArgType ty = argTypes[j];
 734             LIns* arg = ins->arg(j);
 735             NanoAssert(ty != ARGTYPE_V);
 736             if (ty != ARGTYPE_D) {
 737                 // GP arg
 738                 if (r <= R10) {
 739                     asm_regarg(ty, arg, r);
 740                     r = r + 1;
 741                     param_size += sizeof(void*);
 742                 } else {
 743                     // put arg on stack
 744                     TODO(stack_int32);
 745                 }
 746             } else {
 747                 // double
 748                 if (fr <= F13) {
 749                     asm_regarg(ty, arg, fr);
 750                     fr = fr + 1;
 751                 #ifdef NANOJIT_64BIT
 752                     r = r + 1;
 753                 #else
 754                     r = r + 2; // Skip 2 GPRs.
 755                 #endif
 756                     param_size += sizeof(double);
 757                 } else {
 758                     // put arg on stack
 759                     TODO(stack_double);
 760                 }
 761             }
 762         }
 763         if (param_size > max_param_size)
 764             max_param_size = param_size;
 765     }
 766
 767     void Assembler::asm_regarg(ArgType ty, LIns* p, Register r)
 768     {
 769         NanoAssert(r != deprecated_UnknownReg);
 770         NanoAssert(ty != ARGTYPE_V);
 771         if (ty != ARGTYPE_D)
 772         {
 773         #ifdef NANOJIT_64BIT
 774             if (ty == ARGTYPE_I) {
 775                 // sign extend 32->64
 776                 EXTSW(r, r);
 777             } else if (ty == ARGTYPE_UI) {
 778                 // zero extend 32->64
 779                 CLRLDI(r, r, 32);
 780             }
 781         #endif
 782             // arg goes in specific register
 783             if (p->isImmI()) {
 784                 asm_li(r, p->immI());
 785             } else {
 786                 if (p->isExtant()) {
 787                     if (!p->deprecated_hasKnownReg()) {
 788                         // load it into the arg reg
 789                         int d = findMemFor(p);
 790                         if (p->isop(LIR_allocp)) {
 791                             NanoAssert(isS16(d));
 792                             ADDI(r, FP, d);
 793                         } else if (p->isQorD()) {
 794                             LD(r, d, FP);
 795                         } else {
 796                             LWZ(r, d, FP);
 797                         }
 798                     } else {
 799                         // it must be in a saved reg
 800                         MR(r, p->deprecated_getReg());
 801                     }
 802                 }
 803                 else {
 804                     // this is the last use, so fine to assign it
 805                     // to the scratch reg, it's dead after this point.
 806                     findSpecificRegFor(p, r);
 807                 }
 808             }
 809         }
 810         else {
 811             if (p->isExtant()) {
 812                 Register rp = p->deprecated_getReg();
 813                 if (!deprecated_isKnownReg(rp) || !IsFpReg(rp)) {
 814                     // load it into the arg reg
 815                     int d = findMemFor(p);
 816                     LFD(r, d, FP);
 817                 } else {
 818                     // it must be in a saved reg
 819                     NanoAssert(IsFpReg(r) && IsFpReg(rp));
 820                     FMR(r, rp);
 821                 }
 822             }
 823             else {
 824                 // this is the last use, so fine to assign it
 825                 // to the scratch reg, it's dead after this point.
 826                 findSpecificRegFor(p, r);
 827             }
 828         }
 829     }
 830
 831     void Assembler::asm_spill(Register rr, int d, bool quad) {
 832         (void)quad;
 833         NanoAssert(d);
 834         if (IsFpReg(rr)) {
 835             NanoAssert(quad);
 836             STFD(rr, d, FP);
 837         }
 838     #ifdef NANOJIT_64BIT
 839         else if (quad) {
 840             STD(rr, d, FP);
 841         }
 842     #endif
 843         else {
 844             NanoAssert(!quad);
 845             STW(rr, d, FP);
 846         }
 847     }
 848
 849     void Assembler::asm_arith(LIns *ins) {
 850         LOpcode op = ins->opcode();
 851         LIns* lhs = ins->oprnd1();
 852         LIns* rhs = ins->oprnd2();
 853         RegisterMask allow = GpRegs;
 854         Register rr = deprecated_prepResultReg(ins, allow);
 855         Register ra = findRegFor(lhs, GpRegs);
 856
 857         if (rhs->isImmI()) {
 858             int32_t rhsc = rhs->immI();
 859             if (isS16(rhsc)) {
 860                 // ppc arith immediate ops sign-exted the imm16 value
 861                 switch (op) {
 862                 case LIR_addi:
 863                 CASE64(LIR_addq:)
 864                     ADDI(rr, ra, rhsc);
 865                     return;
 866                 case LIR_subi:
 867                     SUBI(rr, ra, rhsc);
 868                     return;
 869                 case LIR_muli:
 870                     MULLI(rr, ra, rhsc);
 871                     return;
 872                 }
 873             }
 874             if (isU16(rhsc)) {
 875                 // ppc logical immediate zero-extend the imm16 value
 876                 switch (op) {
 877                 CASE64(LIR_orq:)
 878                 case LIR_ori:
 879                     ORI(rr, ra, rhsc);
 880                     return;
 881                 CASE64(LIR_andq:)
 882                 case LIR_andi:
 883                     ANDI(rr, ra, rhsc);
 884                     return;
 885                 CASE64(LIR_xorq:)
 886                 case LIR_xori:
 887                     XORI(rr, ra, rhsc);
 888                     return;
 889                 }
 890             }
 891
 892             // LIR shift ops only use last 5bits of shift const
 893             switch (op) {
 894             case LIR_lshi:
 895                 SLWI(rr, ra, rhsc&31);
 896                 return;
 897             case LIR_rshui:
 898                 SRWI(rr, ra, rhsc&31);
 899                 return;
 900             case LIR_rshi:
 901                 SRAWI(rr, ra, rhsc&31);
 902                 return;
 903             }
 904         }
 905
 906         // general case, put rhs in register
 907         Register rb = rhs==lhs ? ra : findRegFor(rhs, GpRegs&~rmask(ra));
 908         switch (op) {
 909             CASE64(LIR_addq:)
 910             case LIR_addi:
 911                 ADD(rr, ra, rb);
 912                 break;
 913             CASE64(LIR_andq:)
 914             case LIR_andi:
 915                 AND(rr, ra, rb);
 916                 break;
 917             CASE64(LIR_orq:)
 918             case LIR_ori:
 919                 OR(rr, ra, rb);
 920                 break;
 921             CASE64(LIR_xorq:)
 922             case LIR_xori:
 923                 XOR(rr, ra, rb);
 924                 break;
 925             case LIR_subi:  SUBF(rr, rb, ra);    break;
 926             case LIR_lshi:  SLW(rr, ra, R0);     ANDI(R0, rb, 31);   break;
 927             case LIR_rshi:  SRAW(rr, ra, R0);    ANDI(R0, rb, 31);   break;
 928             case LIR_rshui: SRW(rr, ra, R0);     ANDI(R0, rb, 31);   break;
 929             case LIR_muli:  MULLW(rr, ra, rb);   break;
 930         #ifdef NANOJIT_64BIT
 931             case LIR_lshq:
 932                 SLD(rr, ra, R0);
 933                 ANDI(R0, rb, 63);
 934                 break;
 935             case LIR_rshuq:
 936                 SRD(rr, ra, R0);
 937                 ANDI(R0, rb, 63);
 938                 break;
 939             case LIR_rshq:
 940                 SRAD(rr, ra, R0);
 941                 ANDI(R0, rb, 63);
 942                 TODO(qirsh);
 943                 break;
 944         #endif
 945             default:
 946                 debug_only(outputf("%s",lirNames[op]);)
 947                 TODO(asm_arith);
 948         }
 949     }
 950
 951     void Assembler::asm_fop(LIns *ins) {
 952         LOpcode op = ins->opcode();
 953         LIns* lhs = ins->oprnd1();
 954         LIns* rhs = ins->oprnd2();
 955         RegisterMask allow = FpRegs;
 956         Register rr = deprecated_prepResultReg(ins, allow);
 957         Register ra, rb;
 958         findRegFor2(allow, lhs, ra, allow, rhs, rb);
 959         switch (op) {
 960             case LIR_addd: FADD(rr, ra, rb); break;
 961             case LIR_subd: FSUB(rr, ra, rb); break;
 962             case LIR_muld: FMUL(rr, ra, rb); break;
 963             case LIR_divd: FDIV(rr, ra, rb); break;
 964             default:
 965                 debug_only(outputf("%s",lirNames[op]);)
 966                 TODO(asm_fop);
 967         }
 968     }
 969
 970     void Assembler::asm_i2d(LIns *ins) {
 971         Register r = deprecated_prepResultReg(ins, FpRegs);
 972         Register v = findRegFor(ins->oprnd1(), GpRegs);
 973         const int d = 16; // natural aligned
 974
 975     #if defined NANOJIT_64BIT && !PEDANTIC
 976         FCFID(r, r);    // convert to double
 977         LFD(r, d, SP);  // load into fpu register
 978         STD(v, d, SP);  // save int64
 979         EXTSW(v, v);    // extend sign destructively, ok since oprnd1 only is 32bit
 980     #else
 981         FSUB(r, r, F0);
 982         LFD(r, d, SP); // scratch area in outgoing linkage area
 983         STW(R0, d+4, SP);
 984         XORIS(R0, v, 0x8000);
 985         LFD(F0, d, SP);
 986         STW(R0, d+4, SP);
 987         LIS(R0, 0x8000);
 988         STW(R0, d, SP);
 989         LIS(R0, 0x4330);
 990     #endif
 991     }
 992
 993     void Assembler::asm_ui2d(LIns *ins) {
 994         Register r = deprecated_prepResultReg(ins, FpRegs);
 995         Register v = findRegFor(ins->oprnd1(), GpRegs);
 996         const int d = 16;
 997
 998     #if defined NANOJIT_64BIT && !PEDANTIC
 999         FCFID(r, r);    // convert to double
1000         LFD(r, d, SP);  // load into fpu register
1001         STD(v, d, SP);  // save int64
1002         CLRLDI(v, v, 32); // zero-extend destructively
1003     #else
1004         FSUB(r, r, F0);
1005         LFD(F0, d, SP);
1006         STW(R0, d+4, SP);
1007         LI(R0, 0);
1008         LFD(r, d, SP);
1009         STW(v, d+4, SP);
1010         STW(R0, d, SP);
1011         LIS(R0, 0x4330);
1012     #endif
1013     }
1014
1015     void Assembler::asm_d2i(LIns*) {
1016         NanoAssertMsg(0, "NJ_F2I_SUPPORTED not yet supported for this architecture");
1017     }
1018
1019     #if defined NANOJIT_64BIT
1020     // XXX: this is sub-optimal, see https://bugzilla.mozilla.org/show_bug.cgi?id=540368#c7.
1021     void Assembler::asm_q2i(LIns *ins) {
1022         Register rr = deprecated_prepResultReg(ins, GpRegs);
1023         int d = findMemFor(ins->oprnd1());
1024         LWZ(rr, d+4, FP);
1025     }
1026
1027     void Assembler::asm_ui2uq(LIns *ins) {
1028         LOpcode op = ins->opcode();
1029         Register r = deprecated_prepResultReg(ins, GpRegs);
1030         Register v = findRegFor(ins->oprnd1(), GpRegs);
1031         switch (op) {
1032         default:
1033             debug_only(outputf("%s",lirNames[op]));
1034             TODO(asm_ui2uq);
1035         case LIR_ui2uq:
1036             CLRLDI(r, v, 32); // clears the top 32 bits
1037             break;
1038         case LIR_i2q:
1039             EXTSW(r, v);
1040             break;
1041         }
1042     }
1043
1044     void Assembler::asm_dasq(LIns*) {
1045         TODO(asm_dasq);
1046     }
1047
1048     void Assembler::asm_qasd(LIns*) {
1049         TODO(asm_qasd);
1050     }
1051
1052     #endif
1053
1054 #ifdef NANOJIT_64BIT
1055     void Assembler::asm_immq(LIns *ins) {
1056         Register r = ins->deprecated_getReg();
1057         if (deprecated_isKnownReg(r) && (rmask(r) & FpRegs)) {
1058             // FPR already assigned, fine, use it
1059             deprecated_freeRsrcOf(ins);
1060         } else {
1061             // use a GPR register; its okay to copy doubles with GPR's
1062             // but *not* okay to copy non-doubles with FPR's
1063             r = deprecated_prepResultReg(ins, GpRegs);
1064         }
1065
1066         if (rmask(r) & FpRegs) {
1067             union {
1068                 double d;
1069                 struct {
1070                     int32_t hi, lo; // Always assuming big-endian in NativePPC.cpp
1071                 } w;
1072             };
1073             d = ins->immD();
1074             LFD(r, 8, SP);
1075             STW(R0, 12, SP);
1076             asm_li(R0, w.lo);
1077             STW(R0, 8, SP);
1078             asm_li(R0, w.hi);
1079         }
1080         else {
1081             int64_t q = ins->immQ();
1082             if (isS32(q)) {
1083                 asm_li(r, int32_t(q));
1084                 return;
1085             }
1086             RLDIMI(r,R0,32,0); // or 32,32?
1087             asm_li(R0, int32_t(q>>32)); // hi bits into R0
1088             asm_li(r, int32_t(q)); // lo bits into dest reg
1089         }
1090     }
1091 #endif
1092
1093     void Assembler::asm_immd(LIns *ins) {
1094     #ifdef NANOJIT_64BIT
1095         Register r = ins->deprecated_getReg();
1096         if (deprecated_isKnownReg(r) && (rmask(r) & FpRegs)) {
1097             // FPR already assigned, fine, use it
1098             deprecated_freeRsrcOf(ins);
1099         } else {
1100             // use a GPR register; its okay to copy doubles with GPR's
1101             // but *not* okay to copy non-doubles with FPR's
1102             r = deprecated_prepResultReg(ins, GpRegs);
1103         }
1104     #else
1105         Register r = deprecated_prepResultReg(ins, FpRegs);
1106     #endif
1107
1108         if (rmask(r) & FpRegs) {
1109             union {
1110                 double d;
1111                 struct {
1112                     int32_t hi, lo; // Always assuming big-endian in NativePPC.cpp
1113                 } w;
1114             };
1115             d = ins->immD();
1116             LFD(r, 8, SP);
1117             STW(R0, 12, SP);
1118             asm_li(R0, w.lo);
1119             STW(R0, 8, SP);
1120             asm_li(R0, w.hi);
1121         }
1122         else {
1123             int64_t q = ins->immDasQ();
1124             if (isS32(q)) {
1125                 asm_li(r, int32_t(q));
1126                 return;
1127             }
1128             RLDIMI(r,R0,32,0); // or 32,32?
1129             asm_li(R0, int32_t(q>>32)); // hi bits into R0
1130             asm_li(r, int32_t(q)); // lo bits into dest reg
1131         }
1132     }
1133
1134     void Assembler::br(NIns* addr, int link) {
1135         // destination unknown, then use maximum branch possible
1136         if (!addr) {
1137             br_far(addr,link);
1138             return;
1139         }
1140
1141         // powerpc offsets are based on the address of the branch instruction
1142         underrunProtect(4);       // ensure _nIns is addr of Bx
1143         ptrdiff_t offset = addr - (_nIns-1); // we want ptr diff's implicit >>2 here
1144
1145         #if !PEDANTIC
1146         if (isS24(offset)) {
1147             Bx(offset, 0, link); // b addr or bl addr
1148             return;
1149         }
1150         ptrdiff_t absaddr = addr - (NIns*)0; // ptr diff implies >>2
1151         if (isS24(absaddr)) {
1152             Bx(absaddr, 1, link); // ba addr or bla addr
1153             return;
1154         }
1155         #endif // !PEDANTIC
1156
1157         br_far(addr,link);
1158     }
1159
1160     void Assembler::br_far(NIns* addr, int link) {
1161         // far jump.
1162         // can't have a page break in this sequence, because the break
1163         // would also clobber ctr and r2.  We use R2 here because it's not available
1164         // to the register allocator, and we use R0 everywhere else as scratch, so using
1165         // R2 here avoids clobbering anything else besides CTR.
1166     #ifdef NANOJIT_64BIT
1167         if (addr==0 || !isU32(uintptr_t(addr))) {
1168             // really far jump to 64bit abs addr
1169             underrunProtect(28); // 7 instructions
1170             BCTR(link);
1171             MTCTR(R2);
1172             asm_li64(R2, uintptr_t(addr)); // 5 instructions
1173             return;
1174         }
1175     #endif
1176         underrunProtect(16);
1177         BCTR(link);
1178         MTCTR(R2);
1179         asm_li32(R2, uint32_t(uintptr_t(addr))); // 2 instructions
1180     }
1181
1182     void Assembler::underrunProtect(int bytes) {
1183         NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
1184         int instr = (bytes + sizeof(NIns) - 1) / sizeof(NIns);
1185         NIns *pc = _nIns;
1186         NIns *top = codeStart;  // this may be in a normal code chunk or an exit code chunk
1187
1188     #if PEDANTIC
1189         // pedanticTop is based on the last call to underrunProtect; any time we call
1190         // underrunProtect and would use more than what's already protected, then insert
1191         // a page break jump.  Sometimes, this will be to a new page, usually it's just
1192         // the next instruction and the only effect is to clobber R2 & CTR
1193
1194         NanoAssert(pedanticTop >= top);
1195         if (pc - instr < pedanticTop) {
1196             // no page break required, but insert a far branch anyway just to be difficult
1197         #ifdef NANOJIT_64BIT
1198             const int br_size = 7;
1199         #else
1200             const int br_size = 4;
1201         #endif
1202             if (pc - instr - br_size < top) {
1203                 // really do need a page break
1204                 verbose_only(if (_logc->lcbits & LC_Native) outputf("newpage %p:", pc);)
1205                 codeAlloc();
1206             }
1207             // now emit the jump, but make sure we won't need another page break.
1208             // we're pedantic, but not *that* pedantic.
1209             pedanticTop = _nIns - br_size;
1210             br(pc, 0);
1211             pedanticTop = _nIns - instr;
1212         }
1213     #else
1214         if (pc - instr < top) {
1215             verbose_only(if (_logc->lcbits & LC_Native) outputf("newpage %p:", pc);)
1216             // This may be in a normal code chunk or an exit code chunk.
1217             codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1218             // This jump will call underrunProtect again, but since we're on a new
1219             // page, nothing will happen.
1220             br(pc, 0);
1221         }
1222     #endif
1223     }
1224
1225     void Assembler::asm_cmov(LIns* ins)
1226     {
1227         LIns* condval = ins->oprnd1();
1228         LIns* iftrue  = ins->oprnd2();
1229         LIns* iffalse = ins->oprnd3();
1230
1231     #ifdef NANOJIT_64BIT
1232         NanoAssert((ins->opcode() == LIR_cmovi  && iftrue->isI() && iffalse->isI()) ||
1233                    (ins->opcode() == LIR_cmovq  && iftrue->isQ() && iffalse->isQ()));
1234     #else
1235         NanoAssert((ins->opcode() == LIR_cmovi  && iftrue->isI() && iffalse->isI()));
1236     #endif
1237
1238         Register rr = prepareResultReg(ins, GpRegs);
1239         Register rf = findRegFor(iffalse, GpRegs & ~rmask(rr));
1240
1241         // If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
1242         Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
1243
1244         underrunProtect(16); // make sure branch target and branch are on same page and thus near
1245         NIns *after = _nIns;
1246         verbose_only(if (_logc->lcbits & LC_Native) outputf("%p:",after);)
1247         MR(rr,rf);
1248
1249         NanoAssert(isS24(after - (_nIns-1)));
1250         asm_branch_near(false, condval, after);
1251
1252         if (rr != rt)
1253             MR(rr, rt);
1254
1255         freeResourcesOf(ins);
1256         if (!iftrue->isInReg()) {
1257             NanoAssert(rt == rr);
1258             findSpecificRegForUnallocated(iftrue, rr);
1259         }
1260
1261         asm_cmp(condval->opcode(), condval->oprnd1(), condval->oprnd2(), CR7);
1262     }
1263
1264     RegisterMask Assembler::nHint(LIns* ins) {
1265         NanoAssert(ins->isop(LIR_paramp));
1266         RegisterMask prefer = 0;
1267         if (ins->paramKind() == 0)
1268             if (ins->paramArg() < 8)
1269                 prefer = rmask(argRegs[ins->paramArg()]);
1270         return prefer;
1271     }
1272
1273     void Assembler::asm_neg_not(LIns *ins) {
1274         Register rr = deprecated_prepResultReg(ins, GpRegs);
1275         Register ra = findRegFor(ins->oprnd1(), GpRegs);
1276         if (ins->isop(LIR_negi)) {
1277             NEG(rr, ra);
1278         } else {
1279             NOT(rr, ra);
1280         }
1281     }
1282
1283     void Assembler::nInit(AvmCore*) {
1284         nHints[LIR_calli]  = rmask(R3);
1285     #ifdef NANOJIT_64BIT
1286         nHints[LIR_callq]  = rmask(R3);
1287     #endif
1288         nHints[LIR_calld]  = rmask(F1);
1289         nHints[LIR_paramp] = PREFER_SPECIAL;
1290     }
1291
1292     void Assembler::nBeginAssembly() {
1293         max_param_size = 0;
1294     }
1295
1296     void Assembler::nativePageSetup() {
1297         NanoAssert(!_inExit);
1298         if (!_nIns) {
1299             codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1300             IF_PEDANTIC( pedanticTop = _nIns; )
1301         }
1302     }
1303
1304     void Assembler::nativePageReset()
1305     {}
1306
1307     // Increment the 32-bit profiling counter at pCtr, without
1308     // changing any registers.
1309     verbose_only(
1310     void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
1311     {
1312     }
1313     )
1314
1315     void Assembler::nPatchBranch(NIns *branch, NIns *target) {
1316         // ppc relative offsets are based on the addr of the branch instruction
1317         ptrdiff_t bd = target - branch;
1318         if (branch[0] == PPC_b) {
1319             // unconditional, 24bit offset.  Whoever generated the unpatched jump
1320             // must have known the final size would fit in 24bits!  otherwise the
1321             // jump would be (lis,ori,mtctr,bctr) and we'd be patching the lis,ori.
1322             NanoAssert(isS24(bd));
1323             branch[0] |= (bd & 0xffffff) << 2;
1324         }
1325         else if ((branch[0] & PPC_bc) == PPC_bc) {
1326             // conditional, 14bit offset. Whoever generated the unpatched jump
1327             // must have known the final size would fit in 14bits!  otherwise the
1328             // jump would be (lis,ori,mtctr,bcctr) and we'd be patching the lis,ori below.
1329             NanoAssert(isS14(bd));
1330             NanoAssert(((branch[0] & 0x3fff)<<2) == 0);
1331             branch[0] |= (bd & 0x3fff) << 2;
1332             TODO(patch_bc);
1333         }
1334     #ifdef NANOJIT_64BIT
1335         // patch 64bit branch
1336         else if ((branch[0] & ~(31<<21)) == PPC_addis) {
1337             // general branch, using lis,ori,sldi,oris,ori to load the const 64bit addr.
1338             Register rd = { (branch[0] >> 21) & 31 };
1339             NanoAssert(branch[1] == PPC_ori  | GPR(rd)<<21 | GPR(rd)<<16);
1340             NanoAssert(branch[3] == PPC_oris | GPR(rd)<<21 | GPR(rd)<<16);
1341             NanoAssert(branch[4] == PPC_ori  | GPR(rd)<<21 | GPR(rd)<<16);
1342             uint64_t imm = uintptr_t(target);
1343             uint32_t lo = uint32_t(imm);
1344             uint32_t hi = uint32_t(imm>>32);
1345             branch[0] = PPC_addis | GPR(rd)<<21 |               uint16_t(hi>>16);
1346             branch[1] = PPC_ori   | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(hi);
1347             branch[3] = PPC_oris  | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo>>16);
1348             branch[4] = PPC_ori   | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(lo);
1349         }
1350     #else // NANOJIT_64BIT
1351         // patch 32bit branch
1352         else if ((branch[0] & ~(31<<21)) == PPC_addis) {
1353             // general branch, using lis,ori to load the const addr.
1354             // patch a lis,ori sequence with a 32bit value
1355             Register rd = { (branch[0] >> 21) & 31 };
1356             NanoAssert(branch[1] == PPC_ori | GPR(rd)<<21 | GPR(rd)<<16);
1357             uint32_t imm = uint32_t(target);
1358             branch[0] = PPC_addis | GPR(rd)<<21 | uint16_t(imm >> 16); // lis rd, imm >> 16
1359             branch[1] = PPC_ori | GPR(rd)<<21 | GPR(rd)<<16 | uint16_t(imm); // ori rd, rd, imm & 0xffff
1360         }
1361     #endif // !NANOJIT_64BIT
1362         else {
1363             TODO(unknown_patch);
1364         }
1365     }
1366
1367     static int cntzlw(int set) {
1368         // On PowerPC, prefer higher registers, to minimize
1369         // size of nonvolatile area that must be saved.
1370         register uint32_t i;
1371         #ifdef __GNUC__
1372         asm ("cntlzw %0,%1" : "=r" (i) : "r" (set));
1373         #else // __GNUC__
1374         # error("unsupported compiler")
1375         #endif // __GNUC__
1376         return 31-i;
1377     }
1378
1379     Register Assembler::nRegisterAllocFromSet(RegisterMask set) {
1380         uint32_t i;
1381         // note, deliberate truncation of 64->32 bits
1382         if (set & 0xffffffff) {
1383             i = cntzlw(int(set)); // gp reg
1384         } else {
1385             i = 32 + cntzlw(int(set>>32)); // fp reg
1386         }
1387         Register r = { i };
1388         _allocator.free &= ~rmask(r);
1389         return r;
1390     }
1391
1392     void Assembler::nRegisterResetAll(RegAlloc &regs) {
1393         regs.clear();
1394         regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
1395     }
1396
1397 #ifdef NANOJIT_64BIT
1398     void Assembler::asm_qbinop(LIns *ins) {
1399         LOpcode op = ins->opcode();
1400         switch (op) {
1401         case LIR_orq:
1402         case LIR_andq:
1403         case LIR_rshuq:
1404         case LIR_rshq:
1405         case LIR_lshq:
1406         case LIR_xorq:
1407         case LIR_addq:
1408             asm_arith(ins);
1409             break;
1410         default:
1411             debug_only(outputf("%s",lirNames[op]));
1412             TODO(asm_qbinop);
1413         }
1414     }
1415 #endif // NANOJIT_64BIT
1416
1417     void Assembler::nFragExit(LIns*) {
1418         TODO(nFragExit);
1419     }
1420
1421     void Assembler::asm_jtbl(LIns* ins, NIns** native_table)
1422     {
1423         // R0 = index*4, R2 = table, CTR = computed address to jump to.
1424         // must ensure no page breaks in here because R2 & CTR can get clobbered.
1425         Register indexreg = findRegFor(ins->oprnd1(), GpRegs);
1426 #ifdef NANOJIT_64BIT
1427         underrunProtect(9*4);
1428         BCTR(0);                                // jump to address in CTR
1429         MTCTR(R2);                              // CTR = R2
1430         LDX(R2, R2, R0);                        // R2 = [table + index*8]
1431         SLDI(R0, indexreg, 3);                  // R0 = index*8
1432         asm_li64(R2, uint64_t(native_table));   // R2 = table (5 instr)
1433 #else // 64bit
1434         underrunProtect(6*4);
1435         BCTR(0);                                // jump to address in CTR
1436         MTCTR(R2);                              // CTR = R2
1437         LWZX(R2, R2, R0);                       // R2 = [table + index*4]
1438         SLWI(R0, indexreg, 2);                  // R0 = index*4
1439         asm_li(R2, int32_t(native_table));      // R2 = table (up to 2 instructions)
1440 #endif // 64bit
1441     }
1442
1443     void Assembler::swapCodeChunks() {
1444         if (!_nExitIns) {
1445             codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
1446         }
1447         SWAP(NIns*, _nIns, _nExitIns);
1448         SWAP(NIns*, codeStart, exitStart);
1449         SWAP(NIns*, codeEnd, exitEnd);
1450         verbose_only( SWAP(size_t, codeBytes, exitBytes); )
1451     }
1452
1453     void Assembler::asm_insert_random_nop() {
1454         NanoAssert(0); // not supported
1455     }
1456
1457 } // namespace nanojit
1458
1459 #endif // FEATURE_NANOJIT && NANOJIT_PPC