js/src/nanojit/LIR.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "nanojit.h"
  41
  42 namespace nanojit
  43 {
  44     using namespace avmplus;
  45     #ifdef FEATURE_NANOJIT
  46
  47     const uint8_t repKinds[] = {
  48 #define OP___(op, number, repKind, retType, isCse) \
  49         LRK_##repKind,
  50 #include "LIRopcode.tbl"
  51 #undef OP___
  52         0
  53     };
  54
  55     const LTy retTypes[] = {
  56 #define OP___(op, number, repKind, retType, isCse) \
  57         LTy_##retType,
  58 #include "LIRopcode.tbl"
  59 #undef OP___
  60         LTy_V
  61     };
  62
  63     const int8_t isCses[] = {
  64 #define OP___(op, number, repKind, retType, isCse) \
  65         isCse,
  66 #include "LIRopcode.tbl"
  67 #undef OP___
  68         0
  69     };
  70
  71     // LIR verbose specific
  72     #ifdef NJ_VERBOSE
  73
  74     const char* lirNames[] = {
  75 #define OP___(op, number, repKind, retType, isCse) \
  76         #op,
  77 #include "LIRopcode.tbl"
  78 #undef OP___
  79         NULL
  80     };
  81
  82     #endif /* NANOJIT_VERBOSE */
  83
  84     uint32_t CallInfo::count_args() const
  85     {
  86         uint32_t argc = 0;
  87         uint32_t argt = _typesig;
  88         argt >>= TYPESIG_FIELDSZB;      // remove retType
  89         while (argt) {
  90             argc++;
  91             argt >>= TYPESIG_FIELDSZB;
  92         }
  93         return argc;
  94     }
  95
  96     uint32_t CallInfo::count_int32_args() const
  97     {
  98         uint32_t argc = 0;
  99         uint32_t argt = _typesig;
 100         argt >>= TYPESIG_FIELDSZB;      // remove retType
 101         while (argt) {
 102             ArgType a = ArgType(argt & TYPESIG_FIELDMASK);
 103             if (a == ARGTYPE_I || a == ARGTYPE_UI)
 104                 argc++;
 105             argt >>= TYPESIG_FIELDSZB;
 106         }
 107         return argc;
 108     }
 109
 110     uint32_t CallInfo::getArgTypes(ArgType* argTypes) const
 111     {
 112         uint32_t argc = 0;
 113         uint32_t argt = _typesig;
 114         argt >>= TYPESIG_FIELDSZB;      // remove retType
 115         while (argt) {
 116             ArgType a = ArgType(argt & TYPESIG_FIELDMASK);
 117             argTypes[argc] = a;
 118             argc++;
 119             argt >>= TYPESIG_FIELDSZB;
 120         }
 121         return argc;
 122     }
 123
 124     // implementation
 125 #ifdef NJ_VERBOSE
 126     void ReverseLister::finish()
 127     {
 128         _logc->printf("\n");
 129         _logc->printf("=== BEGIN %s ===\n", _title);
 130         int j = 0;
 131         for (Seq<char*>* p = _strs.get(); p != NULL; p = p->tail)
 132             _logc->printf("  %02d: %s\n", j++, p->head);
 133         _logc->printf("=== END %s ===\n", _title);
 134         _logc->printf("\n");
 135     }
 136
 137     LIns* ReverseLister::read()
 138     {
 139         // This check is necessary to avoid printing the LIR_start multiple
 140         // times due to lookahead in Assembler::gen().
 141         if (_prevIns && _prevIns->isop(LIR_start))
 142             return _prevIns;
 143         LIns* ins = in->read();
 144         InsBuf b;
 145         const char* str = _printer->formatIns(&b, ins);
 146         char* cpy = new (_alloc) char[strlen(str)+1];
 147         VMPI_strcpy(cpy, str);
 148         _strs.insert(cpy);
 149         _prevIns = ins;
 150         return ins;
 151     }
 152 #endif
 153
 154     // LCompressedBuffer
 155     LirBuffer::LirBuffer(Allocator& alloc) :
 156 #ifdef NJ_VERBOSE
 157           printer(NULL),
 158 #endif
 159           abi(ABI_FASTCALL), state(NULL), param1(NULL), sp(NULL), rp(NULL),
 160           _allocator(alloc)
 161     {
 162         clear();
 163     }
 164
 165     void LirBuffer::clear()
 166     {
 167         // clear the stats, etc
 168         _unused = 0;
 169         _limit = 0;
 170         _stats.lir = 0;
 171         for (int i = 0; i < NumSavedRegs; ++i)
 172             savedRegs[i] = NULL;
 173         chunkAlloc();
 174     }
 175
 176     void LirBuffer::chunkAlloc()
 177     {
 178         _unused = (uintptr_t) _allocator.alloc(CHUNK_SZB);
 179         NanoAssert(_unused != 0); // Allocator.alloc() never returns null. See Allocator.h
 180         _limit = _unused + CHUNK_SZB;
 181     }
 182
 183     int32_t LirBuffer::insCount()
 184     {
 185         return _stats.lir;
 186     }
 187
 188     // Allocate a new page, and write the first instruction to it -- a skip
 189     // linking to last instruction of the previous page.
 190     void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
 191     {
 192         chunkAlloc();
 193         // Link LIR stream back to prior instruction.
 194         // Unlike all the ins*() functions, we don't call makeRoom() here
 195         // because we know we have enough space, having just started a new
 196         // page.
 197         LInsSk* insSk = (LInsSk*)_unused;
 198         LIns*   ins   = insSk->getLIns();
 199         ins->initLInsSk((LIns*)addrOfLastLInsOnCurrentChunk);
 200         _unused += sizeof(LInsSk);
 201         verbose_only(_stats.lir++);
 202     }
 203
 204     // Make room for a single instruction.
 205     uintptr_t LirBuffer::makeRoom(size_t szB)
 206     {
 207         // Make sure the size is ok
 208         NanoAssert(0 == szB % sizeof(void*));
 209         NanoAssert(sizeof(LIns) <= szB && szB <= sizeof(LInsSt));  // LInsSt is the biggest one
 210         NanoAssert(_unused < _limit);
 211
 212         debug_only( bool moved = false; )
 213
 214         // If the instruction won't fit on the current chunk, get a new chunk
 215         if (_unused + szB > _limit) {
 216             uintptr_t addrOfLastLInsOnChunk = _unused - sizeof(LIns);
 217             moveToNewChunk(addrOfLastLInsOnChunk);
 218             debug_only( moved = true; )
 219         }
 220
 221         // We now know that we are on a chunk that has the requested amount of
 222         // room: record the starting address of the requested space and bump
 223         // the pointer.
 224         uintptr_t startOfRoom = _unused;
 225         _unused += szB;
 226         verbose_only(_stats.lir++);             // count the instruction
 227
 228         // If there's no more space on this chunk, move to a new one.
 229         // (This will only occur if the asked-for size filled up exactly to
 230         // the end of the chunk.)  This ensures that next time we enter this
 231         // function, _unused won't be pointing one byte past the end of
 232         // the chunk, which would break everything.
 233         if (_unused >= _limit) {
 234             // Check we used exactly the remaining space
 235             NanoAssert(_unused == _limit);
 236             NanoAssert(!moved);     // shouldn't need to moveToNewChunk twice
 237             uintptr_t addrOfLastLInsOnChunk = _unused - sizeof(LIns);
 238             moveToNewChunk(addrOfLastLInsOnChunk);
 239         }
 240
 241         // Make sure it's word-aligned.
 242         NanoAssert(0 == startOfRoom % sizeof(void*));
 243         return startOfRoom;
 244     }
 245
 246     LIns* LirBufWriter::insStore(LOpcode op, LIns* val, LIns* base, int32_t d, AccSet accSet)
 247     {
 248         if (isS16(d)) {
 249             LInsSt* insSt = (LInsSt*)_buf->makeRoom(sizeof(LInsSt));
 250             LIns*   ins   = insSt->getLIns();
 251             ins->initLInsSt(op, val, base, d, accSet);
 252             return ins;
 253         } else {
 254             // If the displacement is more than 16 bits, put it in a separate instruction.
 255             return insStore(op, val, ins2(LIR_addp, base, insImmWord(d)), 0, accSet);
 256         }
 257     }
 258
 259     LIns* LirBufWriter::ins0(LOpcode op)
 260     {
 261         LInsOp0* insOp0 = (LInsOp0*)_buf->makeRoom(sizeof(LInsOp0));
 262         LIns*    ins    = insOp0->getLIns();
 263         ins->initLInsOp0(op);
 264         return ins;
 265     }
 266
 267     LIns* LirBufWriter::ins1(LOpcode op, LIns* o1)
 268     {
 269         LInsOp1* insOp1 = (LInsOp1*)_buf->makeRoom(sizeof(LInsOp1));
 270         LIns*    ins    = insOp1->getLIns();
 271         ins->initLInsOp1(op, o1);
 272         return ins;
 273     }
 274
 275     LIns* LirBufWriter::ins2(LOpcode op, LIns* o1, LIns* o2)
 276     {
 277         LInsOp2* insOp2 = (LInsOp2*)_buf->makeRoom(sizeof(LInsOp2));
 278         LIns*    ins    = insOp2->getLIns();
 279         ins->initLInsOp2(op, o1, o2);
 280         return ins;
 281     }
 282
 283     LIns* LirBufWriter::ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3)
 284     {
 285         LInsOp3* insOp3 = (LInsOp3*)_buf->makeRoom(sizeof(LInsOp3));
 286         LIns*    ins    = insOp3->getLIns();
 287         ins->initLInsOp3(op, o1, o2, o3);
 288         return ins;
 289     }
 290
 291     LIns* LirBufWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual)
 292     {
 293         if (isS16(d)) {
 294             LInsLd* insLd = (LInsLd*)_buf->makeRoom(sizeof(LInsLd));
 295             LIns*   ins   = insLd->getLIns();
 296             ins->initLInsLd(op, base, d, accSet, loadQual);
 297             return ins;
 298         } else {
 299             // If the displacement is more than 16 bits, put it in a separate instruction.
 300             // Note that CseFilter::insLoad() also does this, so this will
 301             // only occur if CseFilter has been removed from the pipeline.
 302             return insLoad(op, ins2(LIR_addp, base, insImmWord(d)), 0, accSet, loadQual);
 303         }
 304     }
 305
 306     LIns* LirBufWriter::insGuard(LOpcode op, LIns* c, GuardRecord *gr)
 307     {
 308         debug_only( if (LIR_x == op || LIR_xbarrier == op) NanoAssert(!c); )
 309         return ins2(op, c, (LIns*)gr);
 310     }
 311
 312     LIns* LirBufWriter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr)
 313     {
 314         return ins3(op, a, b, (LIns*)gr);
 315     }
 316
 317     LIns* LirBufWriter::insBranch(LOpcode op, LIns* condition, LIns* toLabel)
 318     {
 319         NanoAssert((op == LIR_j && !condition) ||
 320                    ((op == LIR_jf || op == LIR_jt) && condition));
 321         return ins2(op, condition, toLabel);
 322     }
 323
 324     LIns* LirBufWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* toLabel)
 325     {
 326         return ins3(op, a, b, toLabel);
 327     }
 328
 329     LIns* LirBufWriter::insJtbl(LIns* index, uint32_t size)
 330     {
 331         LInsJtbl* insJtbl = (LInsJtbl*) _buf->makeRoom(sizeof(LInsJtbl));
 332         LIns**    table   = new (_buf->_allocator) LIns*[size];
 333         LIns*     ins     = insJtbl->getLIns();
 334         VMPI_memset(table, 0, size * sizeof(LIns*));
 335         ins->initLInsJtbl(index, size, table);
 336         return ins;
 337     }
 338
 339     LIns* LirBufWriter::insAlloc(int32_t size)
 340     {
 341         size = (size+3)>>2; // # of required 32bit words
 342         LInsI* insI = (LInsI*)_buf->makeRoom(sizeof(LInsI));
 343         LIns*  ins  = insI->getLIns();
 344         ins->initLInsI(LIR_allocp, size);
 345         return ins;
 346     }
 347
 348     LIns* LirBufWriter::insParam(int32_t arg, int32_t kind)
 349     {
 350         LInsP* insP = (LInsP*)_buf->makeRoom(sizeof(LInsP));
 351         LIns*  ins  = insP->getLIns();
 352         ins->initLInsP(arg, kind);
 353         if (kind) {
 354             NanoAssert(arg < NumSavedRegs);
 355             _buf->savedRegs[arg] = ins;
 356         }
 357         return ins;
 358     }
 359
 360     LIns* LirBufWriter::insImmI(int32_t imm)
 361     {
 362         LInsI* insI = (LInsI*)_buf->makeRoom(sizeof(LInsI));
 363         LIns*  ins  = insI->getLIns();
 364         ins->initLInsI(LIR_immi, imm);
 365         return ins;
 366     }
 367
 368 #ifdef NANOJIT_64BIT
 369     LIns* LirBufWriter::insImmQ(uint64_t imm)
 370     {
 371         LInsQorD* insQorD = (LInsQorD*)_buf->makeRoom(sizeof(LInsQorD));
 372         LIns*     ins     = insQorD->getLIns();
 373         ins->initLInsQorD(LIR_immq, imm);
 374         return ins;
 375     }
 376 #endif
 377
 378     LIns* LirBufWriter::insComment(const char* str)
 379     {
 380         // Allocate space for and copy the string.  We use the same allocator
 381         // as the normal LIR buffers so it has the same lifetime.
 382         char* str2 = (char*)_buf->_allocator.alloc(VMPI_strlen(str) + 1);
 383         VMPI_strcpy(str2, str);
 384         return ins1(LIR_comment, (LIns*)str);
 385     }
 386
 387     LIns* LirBufWriter::insImmD(double d)
 388     {
 389         LInsQorD* insQorD = (LInsQorD*)_buf->makeRoom(sizeof(LInsQorD));
 390         LIns*     ins     = insQorD->getLIns();
 391         union {
 392             double d;
 393             uint64_t q;
 394         } u;
 395         u.d = d;
 396         ins->initLInsQorD(LIR_immd, u.q);
 397         return ins;
 398     }
 399
 400     // Reads the next non-skip instruction.
 401     LIns* LirReader::read()
 402     {
 403         static const uint8_t insSizes[] = {
 404         // LIR_start is treated specially -- see below.
 405 #define OP___(op, number, repKind, retType, isCse) \
 406             ((number) == LIR_start ? 0 : sizeof(LIns##repKind)),
 407 #include "LIRopcode.tbl"
 408 #undef OP___
 409             0
 410         };
 411
 412         // Check the invariant: _ins never points to a skip.
 413         NanoAssert(_ins && !_ins->isop(LIR_skip));
 414
 415         // Step back one instruction.  Use a table lookup rather than a switch
 416         // to avoid branch mispredictions.  LIR_start is given a special size
 417         // of zero so that we don't step back past the start of the block.
 418         // (Callers of this function should stop once they see a LIR_start.)
 419         LIns* ret = _ins;
 420         _ins = (LIns*)(uintptr_t(_ins) - insSizes[_ins->opcode()]);
 421
 422         // Ensure _ins doesn't end up pointing to a skip.
 423         while (_ins->isop(LIR_skip)) {
 424             NanoAssert(_ins->prevLIns() != _ins);
 425             _ins = _ins->prevLIns();
 426         }
 427
 428         return ret;
 429     }
 430
 431     LOpcode arithOpcodeD2I(LOpcode op)
 432     {
 433         switch (op) {
 434         case LIR_negd:  return LIR_negi;
 435         case LIR_addd:  return LIR_addi;
 436         case LIR_subd:  return LIR_subi;
 437         case LIR_muld:  return LIR_muli;
 438         default:        NanoAssert(0); return LIR_skip;
 439         }
 440     }
 441
 442 #ifdef NANOJIT_64BIT
 443     LOpcode cmpOpcodeI2Q(LOpcode op)
 444     {
 445         switch (op) {
 446         case LIR_eqi:    return LIR_eqq;
 447         case LIR_lti:    return LIR_ltq;
 448         case LIR_gti:    return LIR_gtq;
 449         case LIR_lei:    return LIR_leq;
 450         case LIR_gei:    return LIR_geq;
 451         case LIR_ltui:   return LIR_ltuq;
 452         case LIR_gtui:   return LIR_gtuq;
 453         case LIR_leui:   return LIR_leuq;
 454         case LIR_geui:   return LIR_geuq;
 455         default:        NanoAssert(0); return LIR_skip;
 456         }
 457     }
 458 #endif
 459
 460     LOpcode cmpOpcodeD2I(LOpcode op)
 461     {
 462         switch (op) {
 463         case LIR_eqd:    return LIR_eqi;
 464         case LIR_ltd:    return LIR_lti;
 465         case LIR_gtd:    return LIR_gti;
 466         case LIR_led:    return LIR_lei;
 467         case LIR_ged:    return LIR_gei;
 468         default:        NanoAssert(0); return LIR_skip;
 469         }
 470     }
 471
 472     LOpcode cmpOpcodeD2UI(LOpcode op)
 473     {
 474         switch (op) {
 475         case LIR_eqd:    return LIR_eqi;
 476         case LIR_ltd:    return LIR_ltui;
 477         case LIR_gtd:    return LIR_gtui;
 478         case LIR_led:    return LIR_leui;
 479         case LIR_ged:    return LIR_geui;
 480         default:        NanoAssert(0); return LIR_skip;
 481         }
 482     }
 483
 484     // This is never called, but that's ok because it contains only static
 485     // assertions.
 486     void LIns::staticSanityCheck()
 487     {
 488         // LIns must be word-sized.
 489         NanoStaticAssert(sizeof(LIns) == 1*sizeof(void*));
 490
 491         // LInsXYZ have expected sizes too.
 492         NanoStaticAssert(sizeof(LInsOp0)  == 1*sizeof(void*));
 493         NanoStaticAssert(sizeof(LInsOp1)  == 2*sizeof(void*));
 494         NanoStaticAssert(sizeof(LInsOp2)  == 3*sizeof(void*));
 495         NanoStaticAssert(sizeof(LInsOp3)  == 4*sizeof(void*));
 496         NanoStaticAssert(sizeof(LInsLd)   == 3*sizeof(void*));
 497         NanoStaticAssert(sizeof(LInsSt)   == 4*sizeof(void*));
 498         NanoStaticAssert(sizeof(LInsSk)   == 2*sizeof(void*));
 499         NanoStaticAssert(sizeof(LInsC)    == 3*sizeof(void*));
 500         NanoStaticAssert(sizeof(LInsP)    == 2*sizeof(void*));
 501         NanoStaticAssert(sizeof(LInsI)    == 2*sizeof(void*));
 502     #if defined NANOJIT_64BIT
 503         NanoStaticAssert(sizeof(LInsQorD) == 2*sizeof(void*));
 504     #else
 505         NanoStaticAssert(sizeof(LInsQorD) == 3*sizeof(void*));
 506     #endif
 507         NanoStaticAssert(sizeof(LInsJtbl) == 4*sizeof(void*));
 508
 509         // oprnd_1 must be in the same position in LIns{Op1,Op2,Op3,Ld,St,Jtbl}
 510         // because oprnd1() is used for all of them.
 511         #define OP1OFFSET (offsetof(LInsOp1,  ins) - offsetof(LInsOp1,  oprnd_1))
 512         NanoStaticAssert( OP1OFFSET == (offsetof(LInsOp2,  ins) - offsetof(LInsOp2,  oprnd_1)) );
 513         NanoStaticAssert( OP1OFFSET == (offsetof(LInsOp3,  ins) - offsetof(LInsOp3,  oprnd_1)) );
 514         NanoStaticAssert( OP1OFFSET == (offsetof(LInsLd,   ins) - offsetof(LInsLd,   oprnd_1)) );
 515         NanoStaticAssert( OP1OFFSET == (offsetof(LInsSt,   ins) - offsetof(LInsSt,   oprnd_1)) );
 516         NanoStaticAssert( OP1OFFSET == (offsetof(LInsJtbl, ins) - offsetof(LInsJtbl, oprnd_1)) );
 517
 518         // oprnd_2 must be in the same position in LIns{Op2,Op3,St}
 519         // because oprnd2() is used for all of them.
 520         #define OP2OFFSET (offsetof(LInsOp2, ins) - offsetof(LInsOp2, oprnd_2))
 521         NanoStaticAssert( OP2OFFSET == (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) );
 522         NanoStaticAssert( OP2OFFSET == (offsetof(LInsSt,  ins) - offsetof(LInsSt,  oprnd_2)) );
 523     }
 524
 525     bool insIsS16(LIns* i)
 526     {
 527         if (i->isImmI()) {
 528             int c = i->immI();
 529             return isS16(c);
 530         }
 531         if (i->isCmov()) {
 532             return insIsS16(i->oprnd2()) && insIsS16(i->oprnd3());
 533         }
 534         if (i->isCmp())
 535             return true;
 536         // many other possibilities too.
 537         return false;
 538     }
 539
 540     LIns* ExprFilter::ins1(LOpcode v, LIns* oprnd)
 541     {
 542         switch (v) {
 543 #ifdef NANOJIT_64BIT
 544         case LIR_q2i:
 545             if (oprnd->isImmQ())
 546                 return insImmI(oprnd->immQlo());
 547             break;
 548         case LIR_i2q:
 549             if (oprnd->isImmI())
 550                 return insImmQ(int64_t(int32_t(oprnd->immI())));
 551             break;
 552         case LIR_ui2uq:
 553             if (oprnd->isImmI())
 554                 return insImmQ(uint64_t(uint32_t(oprnd->immI())));
 555             break;
 556         case LIR_dasq:
 557             if (oprnd->isop(LIR_qasd))
 558                 return oprnd->oprnd1();
 559             break;
 560         case LIR_qasd:
 561             if (oprnd->isop(LIR_dasq))
 562                 return oprnd->oprnd1();
 563             break;
 564 #endif
 565 #if NJ_SOFTFLOAT_SUPPORTED
 566         case LIR_dlo2i:
 567             if (oprnd->isImmD())
 568                 return insImmI(oprnd->immDlo());
 569             if (oprnd->isop(LIR_ii2d))
 570                 return oprnd->oprnd1();
 571             break;
 572         case LIR_dhi2i:
 573             if (oprnd->isImmD())
 574                 return insImmI(oprnd->immDhi());
 575             if (oprnd->isop(LIR_ii2d))
 576                 return oprnd->oprnd2();
 577             break;
 578 #endif
 579         case LIR_noti:
 580             if (oprnd->isImmI())
 581                 return insImmI(~oprnd->immI());
 582         involution:
 583             if (v == oprnd->opcode())
 584                 return oprnd->oprnd1();
 585             break;
 586         case LIR_negi:
 587             if (oprnd->isImmI())
 588                 return insImmI(-oprnd->immI());
 589             if (oprnd->isop(LIR_subi)) // -(a-b) = b-a
 590                 return out->ins2(LIR_subi, oprnd->oprnd2(), oprnd->oprnd1());
 591             goto involution;
 592         case LIR_negd:
 593             if (oprnd->isImmD())
 594                 return insImmD(-oprnd->immD());
 595             if (oprnd->isop(LIR_subd))
 596                 return out->ins2(LIR_subd, oprnd->oprnd2(), oprnd->oprnd1());
 597             goto involution;
 598         case LIR_i2d:
 599             if (oprnd->isImmI())
 600                 return insImmD(oprnd->immI());
 601             // Nb: i2d(d2i(x)) != x
 602             break;
 603         case LIR_d2i:
 604             if (oprnd->isImmD())
 605                 return insImmI(int32_t(oprnd->immD()));
 606             if (oprnd->isop(LIR_i2d))
 607                 return oprnd->oprnd1();
 608             break;
 609         case LIR_ui2d:
 610             if (oprnd->isImmI())
 611                 return insImmD(uint32_t(oprnd->immI()));
 612             break;
 613         default:
 614             ;
 615         }
 616
 617         return out->ins1(v, oprnd);
 618     }
 619
 620     // This is an ugly workaround for an apparent compiler
 621     // bug; in VC2008, compiling with optimization on
 622     // will produce spurious errors if this code is inlined
 623     // into ExprFilter::ins2(). See https://bugzilla.mozilla.org/show_bug.cgi?id=538504
 624     inline double do_join(int32_t c1, int32_t c2)
 625     {
 626         union {
 627             double d;
 628             uint64_t u64;
 629         } u;
 630         u.u64 = uint32_t(c1) | uint64_t(c2)<<32;
 631         return u.d;
 632     }
 633
 634     LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2)
 635     {
 636         NanoAssert(oprnd1 && oprnd2);
 637
 638         //-------------------------------------------------------------------
 639         // Folding where the two operands are equal
 640         //-------------------------------------------------------------------
 641         if (oprnd1 == oprnd2) {
 642             // The operands are equal.
 643             switch (v) {
 644             case LIR_xori:
 645             case LIR_subi:
 646             case LIR_ltui:
 647             case LIR_gtui:
 648             case LIR_gti:
 649             case LIR_lti:
 650                 return insImmI(0);
 651
 652             case LIR_ori:
 653             case LIR_andi:
 654                 return oprnd1;
 655
 656             case LIR_lei:
 657             case LIR_leui:
 658             case LIR_gei:
 659             case LIR_geui:
 660                 return insImmI(1);      // (x <= x) == 1; (x >= x) == 1
 661
 662             default:
 663                 break;
 664             }
 665         }
 666
 667         //-------------------------------------------------------------------
 668         // Folding where both operands are immediates, grouped by type
 669         //-------------------------------------------------------------------
 670         if (oprnd1->isImmI() && oprnd2->isImmI()) {
 671             // The operands are both int immediates.
 672             int32_t c1 = oprnd1->immI();
 673             int32_t c2 = oprnd2->immI();
 674             double d;
 675             int32_t r;
 676
 677             switch (v) {
 678 #if NJ_SOFTFLOAT_SUPPORTED
 679             case LIR_ii2d:  return insImmD(do_join(c1, c2));
 680 #endif
 681             case LIR_eqi:   return insImmI(c1 == c2);
 682             case LIR_lti:   return insImmI(c1 <  c2);
 683             case LIR_gti:   return insImmI(c1 >  c2);
 684             case LIR_lei:   return insImmI(c1 <= c2);
 685             case LIR_gei:   return insImmI(c1 >= c2);
 686             case LIR_ltui:  return insImmI(uint32_t(c1) <  uint32_t(c2));
 687             case LIR_gtui:  return insImmI(uint32_t(c1) >  uint32_t(c2));
 688             case LIR_leui:  return insImmI(uint32_t(c1) <= uint32_t(c2));
 689             case LIR_geui:  return insImmI(uint32_t(c1) >= uint32_t(c2));
 690
 691             case LIR_lshi:  return insImmI(c1 << (c2 & 0x1f));
 692             case LIR_rshi:  return insImmI(c1 >> (c2 & 0x1f));
 693             case LIR_rshui: return insImmI(uint32_t(c1) >> (c2 & 0x1f));
 694
 695             case LIR_ori:   return insImmI(c1 | c2);
 696             case LIR_andi:  return insImmI(c1 & c2);
 697             case LIR_xori:  return insImmI(c1 ^ c2);
 698
 699             case LIR_addi:  d = double(c1) + double(c2);    goto fold;
 700             case LIR_subi:  d = double(c1) - double(c2);    goto fold;
 701             case LIR_muli:  d = double(c1) * double(c2);    goto fold;
 702             fold:
 703                 // Make sure the constant expression doesn't overflow.  This
 704                 // probably isn't necessary, because the C++ overflow
 705                 // behaviour is very likely to be the same as the machine code
 706                 // overflow behaviour, but we do it just to be safe.
 707                 r = int32_t(d);
 708                 if (r == d)
 709                     return insImmI(r);
 710                 break;
 711
 712 #if defined NANOJIT_IA32 || defined NANOJIT_X64
 713             case LIR_divi:
 714             case LIR_modi:
 715                 // We can't easily fold div and mod, since folding div makes it
 716                 // impossible to calculate the mod that refers to it. The
 717                 // frontend shouldn't emit div and mod with constant operands.
 718                 NanoAssert(0);
 719 #endif
 720             default:
 721                 break;
 722             }
 723
 724 #ifdef NANOJIT_64BIT
 725         } else if (oprnd1->isImmQ() && oprnd2->isImmQ()) {
 726             // The operands are both quad immediates.
 727             int64_t c1 = oprnd1->immQ();
 728             int64_t c2 = oprnd2->immQ();
 729             static const int64_t MIN_INT64 = int64_t(0x8000000000000000LL);
 730             static const int64_t MAX_INT64 = int64_t(0x7FFFFFFFFFFFFFFFLL);
 731
 732             switch (v) {
 733             case LIR_eqq:   return insImmI(c1 == c2);
 734             case LIR_ltq:   return insImmI(c1 <  c2);
 735             case LIR_gtq:   return insImmI(c1 >  c2);
 736             case LIR_leq:   return insImmI(c1 <= c2);
 737             case LIR_geq:   return insImmI(c1 >= c2);
 738             case LIR_ltuq:  return insImmI(uint64_t(c1) <  uint64_t(c2));
 739             case LIR_gtuq:  return insImmI(uint64_t(c1) >  uint64_t(c2));
 740             case LIR_leuq:  return insImmI(uint64_t(c1) <= uint64_t(c2));
 741             case LIR_geuq:  return insImmI(uint64_t(c1) >= uint64_t(c2));
 742
 743             case LIR_orq:   return insImmQ(c1 | c2);
 744             case LIR_andq:  return insImmQ(c1 & c2);
 745             case LIR_xorq:  return insImmQ(c1 ^ c2);
 746
 747             // Nb: LIR_rshq, LIR_lshq and LIR_rshuq aren't here because their
 748             // RHS is an int.  They are below.
 749
 750             case LIR_addq:
 751                 // Overflow is only possible if both values are positive or
 752                 // both negative.  Just like the 32-bit case, this check
 753                 // probably isn't necessary, because the C++ overflow
 754                 // behaviour is very likely to be the same as the machine code
 755                 // overflow behaviour, but we do it just to be safe.
 756                 if (c1 > 0 && c2 > 0) {
 757                     // Overflows if: c1 + c2 > MAX_INT64
 758                     // Re-express to avoid overflow in the check: c1 > MAX_INT64 - c2
 759                     if (c1 > MAX_INT64 - c2)
 760                         break;                  // overflow
 761                 } else if (c1 < 0 && c2 < 0) {
 762                     // Overflows if: c1 + c2 < MIN_INT64
 763                     // Re-express to avoid overflow in the check: c1 < MIN_INT64 - c2
 764                     if (c1 < MIN_INT64 - c2)
 765                         break;                  // overflow
 766                 }
 767                 return insImmQ(c1 + c2);
 768
 769             case LIR_subq:
 770                 // Overflow is only possible if one value is positive and one
 771                 // negative.
 772                 if (c1 > 0 && c2 < 0) {
 773                     // Overflows if: c1 - c2 > MAX_INT64
 774                     // Re-express to avoid overflow in the check: c1 > MAX_INT64 + c2
 775                     if (c1 > MAX_INT64 + c2)
 776                         break;                  // overflow
 777                 } else if (c1 < 0 && c2 > 0) {
 778                     // Overflows if: c1 - c2 < MIN_INT64
 779                     // Re-express to avoid overflow in the check: c1 < MIN_INT64 + c2
 780                     if (c1 < MIN_INT64 + c2)
 781                         break;                  // overflow
 782                 }
 783                 return insImmQ(c1 - c2);
 784
 785             default:
 786                 break;
 787             }
 788
 789         } else if (oprnd1->isImmQ() && oprnd2->isImmI()) {
 790             // The first operand is a quad immediate, the second is an int
 791             // immediate.
 792             int64_t c1 = oprnd1->immQ();
 793             int32_t c2 = oprnd2->immI();
 794
 795             switch (v) {
 796             case LIR_lshq:  return insImmQ(c1 << (c2 & 0x3f));
 797             case LIR_rshq:  return insImmQ(c1 >> (c2 & 0x3f));
 798             case LIR_rshuq: return insImmQ(uint64_t(c1) >> (c2 & 0x3f));
 799
 800             default:        break;
 801             }
 802 #endif  // NANOJIT_64BIT
 803
 804         } else if (oprnd1->isImmD() && oprnd2->isImmD()) {
 805             // The operands are both double immediates.
 806             double c1 = oprnd1->immD();
 807             double c2 = oprnd2->immD();
 808             switch (v) {
 809             case LIR_eqd:   return insImmI(c1 == c2);
 810             case LIR_ltd:   return insImmI(c1 <  c2);
 811             case LIR_gtd:   return insImmI(c1 >  c2);
 812             case LIR_led:   return insImmI(c1 <= c2);
 813             case LIR_ged:   return insImmI(c1 >= c2);
 814
 815             case LIR_addd:  return insImmD(c1 + c2);
 816             case LIR_subd:  return insImmD(c1 - c2);
 817             case LIR_muld:  return insImmD(c1 * c2);
 818             case LIR_divd:  return insImmD(c1 / c2);
 819
 820             default:        break;
 821             }
 822         }
 823
 824         //-------------------------------------------------------------------
 825         // If only one operand is an immediate, make sure it's on the RHS, if possible
 826         //-------------------------------------------------------------------
 827         if (oprnd1->isImmAny() && !oprnd2->isImmAny()) {
 828             switch (v) {
 829             case LIR_eqi:
 830             CASE64(LIR_eqq:)
 831             case LIR_eqd:
 832             case LIR_addi:
 833             CASE64(LIR_addq:)
 834             case LIR_addd:
 835             case LIR_muli:
 836             case LIR_muld:
 837             case LIR_andi:
 838             CASE64(LIR_andq:)
 839             case LIR_ori:
 840             CASE64(LIR_orq:)
 841             case LIR_xori:
 842             CASE64(LIR_xorq:) {
 843                 // move immediate to RHS
 844                 LIns* t = oprnd2;
 845                 oprnd2 = oprnd1;
 846                 oprnd1 = t;
 847                 break;
 848             }
 849             default:
 850                 if (isCmpOpcode(v)) {
 851                     // move immediate to RHS, swap the operator
 852                     LIns *t = oprnd2;
 853                     oprnd2 = oprnd1;
 854                     oprnd1 = t;
 855                     v = invertCmpOpcode(v);
 856                 }
 857                 break;
 858             }
 859         }
 860
 861         //-------------------------------------------------------------------
 862         // Folding where the RHS is an immediate
 863         //-------------------------------------------------------------------
 864         if (oprnd2->isImmI()) {
 865             // The second operand is an int immediate.
 866             int c = oprnd2->immI();
 867             switch (v) {
 868             case LIR_addi:
 869                 if (oprnd1->isop(LIR_addi) && oprnd1->oprnd2()->isImmI()) {
 870                     // add(add(x,c1),c2) => add(x,c1+c2)
 871                     c += oprnd1->oprnd2()->immI();
 872                     oprnd2 = insImmI(c);
 873                     oprnd1 = oprnd1->oprnd1();
 874                 }
 875                 break;
 876
 877             case LIR_subi:
 878                 if (oprnd1->isop(LIR_addi) && oprnd1->oprnd2()->isImmI()) {
 879                     // sub(add(x,c1),c2) => add(x,c1-c2)
 880                     c = oprnd1->oprnd2()->immI() - c;
 881                     oprnd2 = insImmI(c);
 882                     oprnd1 = oprnd1->oprnd1();
 883                     v = LIR_addi;
 884                 }
 885                 break;
 886
 887             case LIR_rshi:
 888                 if (c == 16 && oprnd1->isop(LIR_lshi) &&
 889                     oprnd1->oprnd2()->isImmI(16) &&
 890                     insIsS16(oprnd1->oprnd1()))
 891                 {
 892                     // rsh(lhs(x,16),16) == x, if x is S16
 893                     return oprnd1->oprnd1();
 894                 }
 895                 break;
 896
 897             default:
 898                 break;
 899             }
 900
 901             if (c == 0) {
 902                 switch (v) {
 903                 case LIR_addi:
 904                 case LIR_ori:
 905                 case LIR_xori:
 906                 case LIR_subi:
 907                 case LIR_lshi:
 908                 case LIR_rshi:
 909                 case LIR_rshui:
 910                 CASE64(LIR_lshq:)   // These are here because their RHS is an int
 911                 CASE64(LIR_rshq:)
 912                 CASE64(LIR_rshuq:)
 913                     return oprnd1;
 914
 915                 case LIR_andi:
 916                 case LIR_muli:
 917                 case LIR_ltui: // unsigned < 0 -> always false
 918                     return oprnd2;
 919
 920                 case LIR_geui: // unsigned >= 0 -> always true
 921                     return insImmI(1);
 922
 923                 case LIR_eqi:
 924                     if (oprnd1->isop(LIR_ori) &&
 925                         oprnd1->oprnd2()->isImmI() &&
 926                         oprnd1->oprnd2()->immI() != 0)
 927                     {
 928                         // (x or c) != 0 if c != 0
 929                         return insImmI(0);
 930                     }
 931
 932                 default:
 933                     break;
 934                 }
 935
 936             } else if (c == -1) {
 937                 switch (v) {
 938                 case LIR_ori:  return oprnd2;       // x | -1 = -1
 939                 case LIR_andi: return oprnd1;       // x & -1 = x
 940                 case LIR_gtui: return insImmI(0);   // u32 >  0xffffffff -> always false
 941                 case LIR_leui: return insImmI(1);   // u32 <= 0xffffffff -> always true
 942                 default:       break;
 943                 }
 944
 945             } else if (c == 1) {
 946                 if (oprnd1->isCmp()) {
 947                     switch (v) {
 948                     case LIR_ori:   return oprnd2;      // 0or1 | 1 = 1   (and oprnd2 == 1)
 949                     case LIR_andi:  return oprnd1;      // 0or1 & 1 = 0or1
 950                     case LIR_gtui:  return insImmI(0);  // 0or1 > 1 -> always false
 951                     default:        break;
 952                     }
 953                 } else if (v == LIR_muli) {
 954                     return oprnd1;          // x * 1 = x
 955                 }
 956             }
 957
 958 #ifdef NANOJIT_64BIT
 959         } else if (oprnd2->isImmQ()) {
 960             // The second operand is a quad immediate.
 961             int64_t c = oprnd2->immQ();
 962             if (c == 0) {
 963                 switch (v) {
 964                 case LIR_addq:
 965                 case LIR_orq:
 966                 case LIR_xorq:
 967                 case LIR_subq:
 968                     return oprnd1;
 969
 970                 case LIR_andq:
 971                     return oprnd2;
 972
 973                 case LIR_ltuq: // unsigned < 0 -> always false
 974                     return insImmI(0);
 975
 976                 case LIR_geuq: // unsigned >= 0 -> always true
 977                     return insImmI(1);
 978
 979                 default:
 980                     break;
 981                 }
 982
 983             } else if (c == -1) {
 984                 switch (v) {
 985                 case LIR_orq:  return oprnd2;       // x | -1 = -1
 986                 case LIR_andq: return oprnd1;       // x & -1 = x
 987                 case LIR_gtuq: return insImmI(0);   // u64 >  0xffffffffffffffff -> always false
 988                 case LIR_leuq: return insImmI(1);   // u64 <= 0xffffffffffffffff -> always true
 989                 default:       break;
 990                 }
 991
 992             } else if (c == 1) {
 993                 if (oprnd1->isCmp()) {
 994                     switch (v) {
 995                     case LIR_orq:   return oprnd2;      // 0or1 | 1 = 1   (and oprnd2 == 1)
 996                     case LIR_andq:  return oprnd1;      // 0or1 & 1 = 0or1
 997                     case LIR_gtuq:  return insImmI(0);  // 0or1 > 1 -> always false
 998                     default:        break;
 999                     }
1000                 }
1001             }
1002 #endif  // NANOJIT_64BIT
1003         }
1004
1005 #if NJ_SOFTFLOAT_SUPPORTED
1006         //-------------------------------------------------------------------
1007         // SoftFloat-specific folding
1008         //-------------------------------------------------------------------
1009         LIns* ins;
1010         if (v == LIR_ii2d && oprnd1->isop(LIR_dlo2i) && oprnd2->isop(LIR_dhi2i) &&
1011             (ins = oprnd1->oprnd1()) == oprnd2->oprnd1())
1012         {
1013             // qjoin(qlo(x),qhi(x)) == x
1014             return ins;
1015         }
1016 #endif
1017
1018         //-------------------------------------------------------------------
1019         // No folding possible
1020         //-------------------------------------------------------------------
1021         return out->ins2(v, oprnd1, oprnd2);
1022     }
1023
1024     LIns* ExprFilter::ins3(LOpcode v, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3)
1025     {
1026         NanoAssert(oprnd1 && oprnd2 && oprnd3);
1027         NanoAssert(isCmovOpcode(v));
1028         if (oprnd2 == oprnd3) {
1029             // c ? a : a => a
1030             return oprnd2;
1031         }
1032         if (oprnd1->isImmI()) {
1033             // immediate ? x : y => return x or y depending on immediate
1034             return oprnd1->immI() ? oprnd2 : oprnd3;
1035         }
1036         if (oprnd1->isop(LIR_eqi) &&
1037             ((oprnd1->oprnd2() == oprnd2 && oprnd1->oprnd1() == oprnd3) ||
1038              (oprnd1->oprnd1() == oprnd2 && oprnd1->oprnd2() == oprnd3))) {
1039             // (y == x) ? x : y  =>  y
1040             // (x == y) ? x : y  =>  y
1041             return oprnd3;
1042         }
1043
1044         return out->ins3(v, oprnd1, oprnd2, oprnd3);
1045     }
1046
1047     LIns* ExprFilter::insGuard(LOpcode v, LIns* c, GuardRecord *gr)
1048     {
1049         if (v == LIR_xt || v == LIR_xf) {
1050             if (c->isImmI()) {
1051                 if ((v == LIR_xt && !c->immI()) || (v == LIR_xf && c->immI())) {
1052                     return 0; // no guard needed
1053                 } else {
1054 #ifdef JS_TRACER
1055                     // We're emitting a guard that will always fail. Any code
1056                     // emitted after this guard is dead code.  But it won't be
1057                     // optimized away, and it could indicate a performance
1058                     // problem or other bug, so assert in debug builds.
1059                     NanoAssertMsg(0, "Constantly false guard detected");
1060 #endif
1061                     return out->insGuard(LIR_x, NULL, gr);
1062                 }
1063             } else {
1064                 while (c->isop(LIR_eqi) && c->oprnd1()->isCmp() && c->oprnd2()->isImmI(0)) {
1065                     // xt(eq(cmp,0)) => xf(cmp)   or   xf(eq(cmp,0)) => xt(cmp)
1066                     v = invertCondGuardOpcode(v);
1067                     c = c->oprnd1();
1068                 }
1069             }
1070         }
1071         return out->insGuard(v, c, gr);
1072     }
1073
1074     // Simplify operator if possible.  Always return NULL if overflow is possible.
1075
1076     LIns* ExprFilter::simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2)
1077     {
1078         LIns* oprnd1 = *opnd1;
1079         LIns* oprnd2 = *opnd2;
1080
1081         if (oprnd1->isImmI() && oprnd2->isImmI()) {
1082             int32_t c1 = oprnd1->immI();
1083             int32_t c2 = oprnd2->immI();
1084             double d = 0.0;
1085
1086             // The code below attempts to perform the operation while
1087             // detecting overflow.  For multiplication, we may unnecessarily
1088             // infer a possible overflow due to the insufficient integer
1089             // range of the double type.
1090
1091             switch (op) {
1092             case LIR_addjovi:
1093             case LIR_addxovi:    d = double(c1) + double(c2);    break;
1094             case LIR_subjovi:
1095             case LIR_subxovi:    d = double(c1) - double(c2);    break;
1096             case LIR_muljovi:
1097             case LIR_mulxovi:    d = double(c1) * double(c2);    break;
1098             default:             NanoAssert(0);                  break;
1099             }
1100             int32_t r = int32_t(d);
1101             if (r == d)
1102                 return insImmI(r);
1103
1104         } else if (oprnd1->isImmI() && !oprnd2->isImmI()) {
1105             switch (op) {
1106             case LIR_addjovi:
1107             case LIR_addxovi:
1108             case LIR_muljovi:
1109             case LIR_mulxovi: {
1110                 // swap operands, moving immediate to RHS
1111                 LIns* t = oprnd2;
1112                 oprnd2 = oprnd1;
1113                 oprnd1 = t;
1114                 // swap actual arguments in caller as well
1115                 *opnd1 = oprnd1;
1116                 *opnd2 = oprnd2;
1117                 break;
1118             }
1119             case LIR_subjovi:
1120             case LIR_subxovi:
1121                 break;
1122             default:
1123                 NanoAssert(0);
1124             }
1125         }
1126
1127         if (oprnd2->isImmI()) {
1128             int c = oprnd2->immI();
1129             if (c == 0) {
1130                 switch (op) {
1131                 case LIR_addjovi:
1132                 case LIR_addxovi:
1133                 case LIR_subjovi:
1134                 case LIR_subxovi:
1135                     return oprnd1;
1136                 case LIR_muljovi:
1137                 case LIR_mulxovi:
1138                     return oprnd2;
1139                 default:
1140                     ;
1141                 }
1142             } else if (c == 1 && (op == LIR_muljovi || op == LIR_mulxovi)) {
1143                 return oprnd1;
1144             }
1145         }
1146
1147         return NULL;
1148     }
1149
1150     LIns* ExprFilter::insGuardXov(LOpcode op, LIns* oprnd1, LIns* oprnd2, GuardRecord *gr)
1151     {
1152         LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
1153         if (simplified)
1154             return simplified;
1155
1156         return out->insGuardXov(op, oprnd1, oprnd2, gr);
1157     }
1158
1159     LIns* ExprFilter::insBranch(LOpcode v, LIns *c, LIns *t)
1160     {
1161         if (v == LIR_jt || v == LIR_jf) {
1162             if (c->isImmI()) {
1163                 if ((v == LIR_jt && !c->immI()) || (v == LIR_jf && c->immI())) {
1164                     return 0; // no jump needed
1165                 } else {
1166 #ifdef JS_TRACER
1167                     // We're emitting a branch that will always be taken.  This may
1168                     // result in dead code that will not be optimized away, and
1169                     // could indicate a performance problem or other bug, so assert
1170                     // in debug builds.
1171                     NanoAssertMsg(0, "Constantly taken branch detected");
1172 #endif
1173                     return out->insBranch(LIR_j, NULL, t);
1174                 }
1175             } else {
1176                 while (c->isop(LIR_eqi) && c->oprnd1()->isCmp() && c->oprnd2()->isImmI(0)) {
1177                     // jt(eq(cmp,0)) => jf(cmp)   or   jf(eq(cmp,0)) => jt(cmp)
1178                     v = invertCondJmpOpcode(v);
1179                     c = c->oprnd1();
1180                 }
1181             }
1182         }
1183         return out->insBranch(v, c, t);
1184     }
1185
1186     LIns* ExprFilter::insBranchJov(LOpcode op, LIns* oprnd1, LIns* oprnd2, LIns* target)
1187     {
1188         LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
1189         if (simplified)
1190             return simplified;
1191
1192         return out->insBranchJov(op, oprnd1, oprnd2, target);
1193     }
1194
1195     LIns* ExprFilter::insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual) {
1196         if (base->isImmP() && !isS8(off)) {
1197             // if the effective address is constant, then transform:
1198             // ld const[bigconst] => ld (const+bigconst)[0]
1199             // note: we don't do this optimization for <8bit field offsets,
1200             // under the assumption that we're more likely to CSE-match the
1201             // constant base address if we dont const-fold small offsets.
1202             uintptr_t p = (uintptr_t)base->immP() + off;
1203             return out->insLoad(op, insImmP((void*)p), 0, accSet, loadQual);
1204         }
1205         return out->insLoad(op, base, off, accSet, loadQual);
1206     }
1207
1208     LIns* LirWriter::insStore(LIns* value, LIns* base, int32_t d, AccSet accSet)
1209     {
1210         // Determine which kind of store should be used for 'value' based on
1211         // its type.
1212         LOpcode op = LOpcode(0);
1213         switch (value->retType()) {
1214         case LTy_I: op = LIR_sti;   break;
1215 #ifdef NANOJIT_64BIT
1216         case LTy_Q: op = LIR_stq;   break;
1217 #endif
1218         case LTy_D: op = LIR_std;   break;
1219         case LTy_V: NanoAssert(0);  break;
1220         default:    NanoAssert(0);  break;
1221         }
1222         return insStore(op, value, base, d, accSet);
1223     }
1224
1225     LIns* LirWriter::insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov)
1226     {
1227         // 'cond' must be a conditional, unless it has been optimized to 0 or
1228         // 1.  In that case make it an ==0 test and flip the branches.  It'll
1229         // get constant-folded by ExprFilter subsequently.
1230         if (!cond->isCmp()) {
1231             NanoAssert(cond->isImmI());
1232             cond = insEqI_0(cond);
1233             LIns* tmp = iftrue;
1234             iftrue = iffalse;
1235             iffalse = tmp;
1236         }
1237
1238         if (use_cmov) {
1239             LOpcode op = LIR_cmovi;
1240             if (iftrue->isI() && iffalse->isI()) {
1241                 op = LIR_cmovi;
1242 #ifdef NANOJIT_64BIT
1243             } else if (iftrue->isQ() && iffalse->isQ()) {
1244                 op = LIR_cmovq;
1245 #endif
1246             } else if (iftrue->isD() && iffalse->isD()) {
1247                 op = LIR_cmovd;
1248             } else {
1249                 NanoAssert(0);  // type error
1250             }
1251             return ins3(op, cond, iftrue, iffalse);
1252         }
1253
1254         LIns* ncond = ins1(LIR_negi, cond); // cond ? -1 : 0
1255         return ins2(LIR_ori,
1256                     ins2(LIR_andi, iftrue, ncond),
1257                     ins2(LIR_andi, iffalse, ins1(LIR_noti, ncond)));
1258     }
1259
1260     LIns* LirBufWriter::insCall(const CallInfo *ci, LIns* args[])
1261     {
1262         LOpcode op = getCallOpcode(ci);
1263 #if NJ_SOFTFLOAT_SUPPORTED
1264         // SoftFloat: convert LIR_calld to LIR_calli.
1265         if (_config.soft_float && op == LIR_calld)
1266             op = LIR_calli;
1267 #endif
1268
1269         int32_t argc = ci->count_args();
1270         NanoAssert(argc <= (int)MAXARGS);
1271
1272         // Allocate space for and copy the arguments.  We use the same
1273         // allocator as the normal LIR buffers so it has the same lifetime.
1274         // Nb: this must be kept in sync with arg().
1275         LIns** args2 = (LIns**)_buf->_allocator.alloc(argc * sizeof(LIns*));
1276         memcpy(args2, args, argc * sizeof(LIns*));
1277
1278         // Allocate and write the call instruction.
1279         LInsC* insC = (LInsC*)_buf->makeRoom(sizeof(LInsC));
1280         LIns*  ins  = insC->getLIns();
1281         ins->initLInsC(op, args2, ci);
1282         return ins;
1283     }
1284
1285     using namespace avmplus;
1286
1287     StackFilter::StackFilter(LirFilter *in, Allocator& alloc, LIns* sp)
1288         : LirFilter(in), sp(sp), stk(alloc), top(0)
1289     {}
1290
1291     // If we see a sequence like this:
1292     //
1293     //   sti sp[0]
1294     //   ...
1295     //   sti sp[0]
1296     //
1297     // where '...' contains no guards, we can remove the first store.  Also,
1298     // because stack entries are eight bytes each (we check this), if we have
1299     // this:
1300     //
1301     //   stfi sp[0]
1302     //   ...
1303     //   sti sp[0]
1304     //
1305     // we can again remove the first store -- even though the second store
1306     // doesn't clobber the high four bytes -- because we know the entire value
1307     // stored by the first store is dead.
1308     //
1309     LIns* StackFilter::read()
1310     {
1311         for (;;) {
1312             LIns* ins = in->read();
1313
1314             if (ins->isStore()) {
1315                 LIns* base = ins->oprnd2();
1316                 if (base == sp) {
1317                     // 'disp' must be eight-aligned because each stack entry is 8 bytes.
1318                     NanoAssert((ins->disp() & 0x7) == 0);
1319
1320                     int d = ins->disp() >> 3;
1321                     if (d >= top) {
1322                         continue;
1323                     } else {
1324                         d = top - d;
1325                         if (stk.get(d)) {
1326                             continue;
1327                         } else {
1328                             stk.set(d);
1329                         }
1330                     }
1331                 }
1332             }
1333             /*
1334              * NB: If there is a backward branch other than the loop-restart branch, this is
1335              * going to be wrong. Unfortunately there doesn't seem to be an easy way to detect
1336              * such branches. Just do not create any.
1337              *
1338              * The isLive() call is valid because liveness will have been
1339              * computed by Assembler::gen() for every instruction following
1340              * this guard.
1341              */
1342             else if (ins->isGuard() && ins->isLive()) {
1343                 stk.reset();
1344                 top = getTop(ins);
1345                 top >>= 3;
1346             }
1347
1348             return ins;
1349         }
1350     }
1351
1352 #ifdef NJ_VERBOSE
1353     class RetiredEntry
1354     {
1355     public:
1356         Seq<LIns*>* live;
1357         LIns* i;
1358         RetiredEntry(): live(NULL), i(NULL) {}
1359     };
1360
1361     class LiveTable
1362     {
1363         Allocator& alloc;
1364     public:
1365         HashMap<LIns*, LIns*> live;
1366         SeqBuilder<RetiredEntry*> retired;
1367         int retiredCount;
1368         int maxlive;
1369         LiveTable(Allocator& alloc)
1370             : alloc(alloc)
1371             , live(alloc)
1372             , retired(alloc)
1373             , retiredCount(0)
1374             , maxlive(0)
1375         { }
1376
1377         void add(LIns* ins, LIns* use) {
1378             if (!ins->isImmAny() && !live.containsKey(ins)) {
1379                 NanoAssert(size_t(ins->opcode()) < sizeof(lirNames) / sizeof(lirNames[0]));
1380                 live.put(ins,use);
1381             }
1382         }
1383
1384         void retire(LIns* i) {
1385             RetiredEntry *e = new (alloc) RetiredEntry();
1386             e->i = i;
1387             SeqBuilder<LIns*> livelist(alloc);
1388             HashMap<LIns*, LIns*>::Iter iter(live);
1389             int live_count = 0;
1390             while (iter.next()) {
1391                 LIns* ins = iter.key();
1392                 if (!ins->isV()) {
1393                     live_count++;
1394                     livelist.insert(ins);
1395                 }
1396             }
1397             e->live = livelist.get();
1398             if (live_count > maxlive)
1399                 maxlive = live_count;
1400
1401             live.remove(i);
1402             retired.insert(e);
1403             retiredCount++;
1404         }
1405
1406         bool contains(LIns* i) {
1407             return live.containsKey(i);
1408         }
1409     };
1410
1411     /*
1412      * traverse the LIR buffer and discover which instructions are live
1413      * by starting from instructions with side effects (stores, calls, branches)
1414      * and marking instructions used by them.  Works bottom-up, in one pass.
1415      * if showLiveRefs == true, also print the set of live expressions next to
1416      * each instruction
1417      */
1418     void live(LirFilter* in, Allocator& alloc, Fragment *frag, LogControl *logc)
1419     {
1420         // traverse backwards to find live exprs and a few other stats.
1421
1422         LiveTable live(alloc);
1423         uint32_t exits = 0;
1424         int total = 0;
1425         if (frag->lirbuf->state)
1426             live.add(frag->lirbuf->state, 0);
1427         for (LIns* ins = in->read(); !ins->isop(LIR_start); ins = in->read())
1428         {
1429             total++;
1430
1431             // First handle instructions that are always live (ie. those that
1432             // don't require being marked as live), eg. those with
1433             // side-effects.  We ignore LIR_paramp.
1434             if (ins->isLive() && !ins->isop(LIR_paramp))
1435             {
1436                 live.add(ins, 0);
1437                 if (ins->isGuard())
1438                     exits++;
1439             }
1440
1441             // now propagate liveness
1442             if (live.contains(ins))
1443             {
1444                 live.retire(ins);
1445
1446                 switch (ins->opcode()) {
1447                 case LIR_skip:
1448                     NanoAssertMsg(0, "Shouldn't see LIR_skip");
1449                     break;
1450
1451                 case LIR_start:
1452                 case LIR_regfence:
1453                 case LIR_paramp:
1454                 case LIR_x:
1455                 case LIR_xbarrier:
1456                 case LIR_j:
1457                 case LIR_label:
1458                 case LIR_immi:
1459                 CASE64(LIR_immq:)
1460                 case LIR_immd:
1461                 case LIR_allocp:
1462                 case LIR_comment:
1463                     // No operands, do nothing.
1464                     break;
1465
1466                 case LIR_ldi:
1467                 CASE64(LIR_ldq:)
1468                 case LIR_ldd:
1469                 case LIR_lduc2ui:
1470                 case LIR_ldus2ui:
1471                 case LIR_ldc2i:
1472                 case LIR_lds2i:
1473                 case LIR_ldf2d:
1474                 case LIR_reti:
1475                 CASE64(LIR_retq:)
1476                 case LIR_retd:
1477                 case LIR_livei:
1478                 CASE64(LIR_liveq:)
1479                 case LIR_lived:
1480                 case LIR_xt:
1481                 case LIR_xf:
1482                 case LIR_xtbl:
1483                 case LIR_jt:
1484                 case LIR_jf:
1485                 case LIR_jtbl:
1486                 case LIR_negi:
1487                 case LIR_negd:
1488                 case LIR_noti:
1489                 CASESF(LIR_dlo2i:)
1490                 CASESF(LIR_dhi2i:)
1491                 CASESF(LIR_hcalli:)
1492                 CASE64(LIR_i2q:)
1493                 CASE64(LIR_ui2uq:)
1494                 case LIR_i2d:
1495                 case LIR_ui2d:
1496                 CASE64(LIR_q2i:)
1497                 case LIR_d2i:
1498                 CASE64(LIR_dasq:)
1499                 CASE64(LIR_qasd:)
1500                 CASE86(LIR_modi:)
1501                     live.add(ins->oprnd1(), 0);
1502                     break;
1503
1504                 case LIR_sti:
1505                 CASE64(LIR_stq:)
1506                 case LIR_std:
1507                 case LIR_sti2c:
1508                 case LIR_sti2s:
1509                 case LIR_std2f:
1510                 case LIR_eqi:
1511                 case LIR_lti:
1512                 case LIR_gti:
1513                 case LIR_lei:
1514                 case LIR_gei:
1515                 case LIR_ltui:
1516                 case LIR_gtui:
1517                 case LIR_leui:
1518                 case LIR_geui:
1519                 case LIR_eqd:
1520                 case LIR_ltd:
1521                 case LIR_gtd:
1522                 case LIR_led:
1523                 case LIR_ged:
1524                 CASE64(LIR_eqq:)
1525                 CASE64(LIR_ltq:)
1526                 CASE64(LIR_gtq:)
1527                 CASE64(LIR_leq:)
1528                 CASE64(LIR_geq:)
1529                 CASE64(LIR_ltuq:)
1530                 CASE64(LIR_gtuq:)
1531                 CASE64(LIR_leuq:)
1532                 CASE64(LIR_geuq:)
1533                 case LIR_lshi:
1534                 case LIR_rshi:
1535                 case LIR_rshui:
1536                 CASE64(LIR_lshq:)
1537                 CASE64(LIR_rshq:)
1538                 CASE64(LIR_rshuq:)
1539                 case LIR_addi:
1540                 case LIR_subi:
1541                 case LIR_muli:
1542                 case LIR_addxovi:
1543                 case LIR_subxovi:
1544                 case LIR_mulxovi:
1545                 case LIR_addjovi:
1546                 case LIR_subjovi:
1547                 case LIR_muljovi:
1548                 CASE86(LIR_divi:)
1549                 case LIR_addd:
1550                 case LIR_subd:
1551                 case LIR_muld:
1552                 case LIR_divd:
1553                 CASE64(LIR_addq:)
1554                 CASE64(LIR_subq:)
1555                 CASE64(LIR_addjovq:)
1556                 CASE64(LIR_subjovq:)
1557                 case LIR_andi:
1558                 case LIR_ori:
1559                 case LIR_xori:
1560                 CASE64(LIR_andq:)
1561                 CASE64(LIR_orq:)
1562                 CASE64(LIR_xorq:)
1563                 CASESF(LIR_ii2d:)
1564                 case LIR_file:
1565                 case LIR_line:
1566                     live.add(ins->oprnd1(), 0);
1567                     live.add(ins->oprnd2(), 0);
1568                     break;
1569
1570                 case LIR_cmovi:
1571                 CASE64(LIR_cmovq:)
1572                 case LIR_cmovd:
1573                     live.add(ins->oprnd1(), 0);
1574                     live.add(ins->oprnd2(), 0);
1575                     live.add(ins->oprnd3(), 0);
1576                     break;
1577
1578                 case LIR_callv:
1579                 case LIR_calli:
1580                 CASE64(LIR_callq:)
1581                 case LIR_calld:
1582                     for (int i = 0, argc = ins->argc(); i < argc; i++)
1583                         live.add(ins->arg(i), 0);
1584                     break;
1585
1586                 default:
1587                     NanoAssertMsgf(0, "unhandled opcode: %d", ins->opcode());
1588                     break;
1589                 }
1590             }
1591         }
1592
1593         logc->printf("  Live instruction count %d, total %u, max pressure %d\n",
1594                      live.retiredCount, total, live.maxlive);
1595         if (exits > 0)
1596             logc->printf("  Side exits %u\n", exits);
1597         logc->printf("  Showing LIR instructions with live-after variables\n");
1598         logc->printf("\n");
1599
1600         // print live exprs, going forwards
1601         LInsPrinter *printer = frag->lirbuf->printer;
1602         bool newblock = true;
1603         for (Seq<RetiredEntry*>* p = live.retired.get(); p != NULL; p = p->tail) {
1604             RetiredEntry* e = p->head;
1605             InsBuf ib;
1606             RefBuf rb;
1607             char livebuf[4000], *s=livebuf;
1608             *s = 0;
1609             if (!newblock && e->i->isop(LIR_label)) {
1610                 logc->printf("\n");
1611             }
1612             newblock = false;
1613             for (Seq<LIns*>* p = e->live; p != NULL; p = p->tail) {
1614                 VMPI_strcpy(s, printer->formatRef(&rb, p->head));
1615                 s += VMPI_strlen(s);
1616                 *s++ = ' '; *s = 0;
1617                 NanoAssert(s < livebuf+sizeof(livebuf));
1618             }
1619             /* If the LIR insn is pretty short, print it and its
1620                live-after set on the same line.  If not, put
1621                live-after set on a new line, suitably indented. */
1622             const char* insn_text = printer->formatIns(&ib, e->i);
1623             if (VMPI_strlen(insn_text) >= 30-2) {
1624                 logc->printf("  %-30s\n  %-30s %s\n", insn_text, "", livebuf);
1625             } else {
1626                 logc->printf("  %-30s %s\n", insn_text, livebuf);
1627             }
1628
1629             if (e->i->isGuard() || e->i->isBranch() || e->i->isRet()) {
1630                 logc->printf("\n");
1631                 newblock = true;
1632             }
1633         }
1634     }
1635
1636     void LirNameMap::addNameWithSuffix(LIns* ins, const char *name, int suffix,
1637                                        bool ignoreOneSuffix) {
1638         NanoAssert(!names.containsKey(ins));
1639         const int N = 100;
1640         char name2[N];
1641         if (suffix == 1 && ignoreOneSuffix) {
1642             VMPI_snprintf(name2, N, "%s", name);                // don't add '1' suffix
1643         } else if (VMPI_isdigit(name[VMPI_strlen(name)-1])) {
1644             VMPI_snprintf(name2, N, "%s_%d", name, suffix);     // use '_' to avoid confusion
1645         } else {
1646             VMPI_snprintf(name2, N, "%s%d", name, suffix);      // normal case
1647         }
1648
1649         char *copy = new (alloc) char[VMPI_strlen(name2)+1];
1650         VMPI_strcpy(copy, name2);
1651         Entry *e = new (alloc) Entry(copy);
1652         names.put(ins, e);
1653     }
1654
1655     void LirNameMap::addName(LIns* ins, const char* name) {
1656         // The lookup may succeed, ie. we may already have a name for this
1657         // instruction.  This can happen because of CSE.  Eg. if we have this:
1658         //
1659         //   ins = addName("foo", insImmI(0))
1660         //
1661         // that assigns the name "foo1" to 'ins'.  If we later do this:
1662         //
1663         //   ins2 = addName("foo", insImmI(0))
1664         //
1665         // then CSE will cause 'ins' and 'ins2' to be equal.  So 'ins2'
1666         // already has a name ("foo1") and there's no need to generate a new
1667         // name "foo2".
1668         //
1669         if (!names.containsKey(ins)) {
1670             Str* str = new (alloc) Str(alloc, name);
1671             int suffix = namecounts.add(*str);
1672             addNameWithSuffix(ins, name, suffix, /*ignoreOneSuffix*/true);
1673         }
1674     }
1675
1676     const char* LirNameMap::createName(LIns* ins) {
1677         if (ins->isCall()) {
1678 #if NJ_SOFTFLOAT_SUPPORTED
1679             if (ins->isop(LIR_hcalli)) {
1680                 ins = ins->oprnd1();    // we've presumably seen the other half already
1681             } else
1682 #endif
1683             {
1684                 if (!names.containsKey(ins))
1685                     addNameWithSuffix(ins, ins->callInfo()->_name, funccounts.add(ins->callInfo()),
1686                                       /*ignoreOneSuffix*/false);
1687             }
1688         } else {
1689             if (!names.containsKey(ins))
1690                 addNameWithSuffix(ins, lirNames[ins->opcode()], lircounts.add(ins->opcode()),
1691                                   /*ignoreOneSuffix*/false);
1692
1693         }
1694         return names.get(ins)->name;
1695     }
1696
1697     const char* LirNameMap::lookupName(LIns* ins)
1698     {
1699         Entry* e = names.get(ins);
1700         return e ? e->name : NULL;
1701     }
1702
1703     char* LInsPrinter::formatAccSet(RefBuf* buf, AccSet accSet) {
1704         if (accSet == ACCSET_NONE) {
1705             VMPI_sprintf(buf->buf, ".none");
1706         } else if (accSet == ACCSET_ALL) {
1707             VMPI_sprintf(buf->buf, ".all");
1708         } else {
1709             char* b = buf->buf;
1710             b[0] = 0;
1711             // The AccSet may contain bits set for regions not used by the
1712             // embedding, if any have been specified via
1713             // (ACCSET_ALL & ~ACCSET_XYZ).  So only print those that are
1714             // relevant.
1715             for (int i = 0; i < EMB_NUM_USED_ACCS; i++) {
1716                 if (accSet & (1 << i)) {
1717                     VMPI_strcat(b, ".");
1718                     VMPI_strcat(b, accNames[i]);
1719                     accSet &= ~(1 << i);
1720                 }
1721             }
1722             NanoAssert(VMPI_strlen(b) < buf->len);
1723         }
1724         return buf->buf;
1725     }
1726
1727     char* LInsPrinter::formatImmI(RefBuf* buf, int32_t c) {
1728         if (-10000 < c && c < 10000) {
1729             VMPI_snprintf(buf->buf, buf->len, "%d", c);
1730         } else {
1731 #if !defined NANOJIT_64BIT
1732             formatAddr(buf, (void*)c);
1733 #else
1734             VMPI_snprintf(buf->buf, buf->len, "0x%x", (unsigned int)c);
1735 #endif
1736         }
1737         return buf->buf;
1738     }
1739
1740 #if defined NANOJIT_64BIT
1741     char* LInsPrinter::formatImmQ(RefBuf* buf, uint64_t c) {
1742         if (-10000 < (int64_t)c && c < 10000) {
1743             VMPI_snprintf(buf->buf, buf->len, "%dLL", (int)c);
1744         } else {
1745             formatAddr(buf, (void*)c);
1746         }
1747         return buf->buf;
1748     }
1749 #endif
1750
1751     char* LInsPrinter::formatImmD(RefBuf* buf, double c) {
1752         VMPI_snprintf(buf->buf, buf->len, "%g", c);
1753         return buf->buf;
1754     }
1755
1756     char* LInsPrinter::formatAddr(RefBuf* buf, void* p)
1757     {
1758         char*   name;
1759         int32_t offset;
1760         addrNameMap->lookupAddr(p, name, offset);
1761
1762         if (name) {
1763             if (offset != 0) {
1764                 VMPI_snprintf(buf->buf, buf->len, "%p %s+%d", p, name, offset);
1765             } else {
1766                 VMPI_snprintf(buf->buf, buf->len, "%p %s", p, name);
1767             }
1768         } else {
1769             VMPI_snprintf(buf->buf, buf->len, "%p", p);
1770         }
1771
1772         return buf->buf;
1773     }
1774
1775     char* LInsPrinter::formatRef(RefBuf* buf, LIns *ref, bool showImmValue)
1776     {
1777         // Give 'ref' a name if it doesn't have one.
1778         const char* name = lirNameMap->lookupName(ref);
1779         if (!name) {
1780             name = lirNameMap->createName(ref);
1781         }
1782
1783         // Put it in the buffer.  If it's an immediate, show the value if
1784         // showImmValue==true.  (This facility allows us to print immediate
1785         // values when they're used but not when they're def'd, ie. we don't
1786         // want "immi1/*1*/ = immi 1".)
1787         RefBuf buf2;
1788         if (ref->isImmI() && showImmValue) {
1789             VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmI(&buf2, ref->immI()));
1790         }
1791 #ifdef NANOJIT_64BIT
1792         else if (ref->isImmQ() && showImmValue) {
1793             VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmQ(&buf2, ref->immQ()));
1794         }
1795 #endif
1796         else if (ref->isImmD() && showImmValue) {
1797             VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmD(&buf2, ref->immD()));
1798         }
1799         else {
1800             VMPI_snprintf(buf->buf, buf->len, "%s", name);
1801         }
1802
1803         return buf->buf;
1804     }
1805
1806     char* LInsPrinter::formatIns(InsBuf* buf, LIns* i)
1807     {
1808         char *s = buf->buf;
1809         size_t n = buf->len;
1810         RefBuf b1, b2, b3, b4;
1811         LOpcode op = i->opcode();
1812         switch (op)
1813         {
1814             case LIR_immi:
1815                 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1816                               lirNames[op], formatImmI(&b2, i->immI()));
1817                 break;
1818
1819 #ifdef NANOJIT_64BIT
1820             case LIR_immq:
1821                 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1822                               lirNames[op], formatImmQ(&b2, i->immQ()));
1823                 break;
1824 #endif
1825
1826             case LIR_immd:
1827                 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1828                               lirNames[op], formatImmD(&b2, i->immD()));
1829                 break;
1830
1831             case LIR_allocp:
1832                 VMPI_snprintf(s, n, "%s = %s %d", formatRef(&b1, i), lirNames[op], i->size());
1833                 break;
1834
1835             case LIR_start:
1836             case LIR_regfence:
1837                 VMPI_snprintf(s, n, "%s", lirNames[op]);
1838                 break;
1839
1840             case LIR_callv:
1841             case LIR_calli:
1842             CASE64(LIR_callq:)
1843             case LIR_calld: {
1844                 const CallInfo* call = i->callInfo();
1845                 int32_t argc = i->argc();
1846                 int32_t m = int32_t(n);     // Windows doesn't have 'ssize_t'
1847                 if (call->isIndirect())
1848                     m -= VMPI_snprintf(s, m, "%s = %s%s [%s] ( ", formatRef(&b1, i), lirNames[op],
1849                                        formatAccSet(&b2, call->_storeAccSet),
1850                                        formatRef(&b3, i->arg(--argc)));
1851                 else
1852                     m -= VMPI_snprintf(s, m, "%s = %s%s #%s ( ", formatRef(&b1, i), lirNames[op],
1853                                        formatAccSet(&b2, call->_storeAccSet), call->_name);
1854                 if (m < 0) break;
1855                 for (int32_t j = argc - 1; j >= 0; j--) {
1856                     s += VMPI_strlen(s);
1857                     m -= VMPI_snprintf(s, m, "%s ",formatRef(&b2, i->arg(j)));
1858                     if (m < 0) break;
1859                 }
1860                 s += VMPI_strlen(s);
1861                 m -= VMPI_snprintf(s, m, ")");
1862                 break;
1863             }
1864
1865             case LIR_jtbl: {
1866                 int32_t m = int32_t(n);     // Windows doesn't have 'ssize_t'
1867                 m -= VMPI_snprintf(s, m, "%s %s [ ", lirNames[op], formatRef(&b1, i->oprnd1()));
1868                 if (m < 0) break;
1869                 for (uint32_t j = 0, sz = i->getTableSize(); j < sz; j++) {
1870                     LIns* target = i->getTarget(j);
1871                     s += VMPI_strlen(s);
1872                     m -= VMPI_snprintf(s, m, "%s ", target ? formatRef(&b2, target) : "unpatched");
1873                     if (m < 0) break;
1874                 }
1875                 s += VMPI_strlen(s);
1876                 m -= VMPI_snprintf(s, m, "]");
1877                 break;
1878             }
1879
1880             case LIR_paramp: {
1881                 uint32_t arg = i->paramArg();
1882                 if (!i->paramKind()) {
1883                     if (arg < sizeof(Assembler::argRegs)/sizeof(Assembler::argRegs[0])) {
1884                         VMPI_snprintf(s, n, "%s = %s %d %s", formatRef(&b1, i), lirNames[op],
1885                             arg, gpn(Assembler::argRegs[arg]));
1886                     } else {
1887                         VMPI_snprintf(s, n, "%s = %s %d", formatRef(&b1, i), lirNames[op], arg);
1888                     }
1889                 } else {
1890                     VMPI_snprintf(s, n, "%s = %s %d %s", formatRef(&b1, i), lirNames[op],
1891                         arg, gpn(Assembler::savedRegs[arg]));
1892                 }
1893                 break;
1894             }
1895
1896             case LIR_label:
1897                 VMPI_snprintf(s, n, "%s:", formatRef(&b1, i));
1898                 break;
1899
1900             case LIR_jt:
1901             case LIR_jf:
1902                 VMPI_snprintf(s, n, "%s %s -> %s", lirNames[op], formatRef(&b1, i->oprnd1()),
1903                     i->oprnd2() ? formatRef(&b2, i->oprnd2()) : "unpatched");
1904                 break;
1905
1906             case LIR_j:
1907                 VMPI_snprintf(s, n, "%s -> %s", lirNames[op],
1908                     i->oprnd2() ? formatRef(&b1, i->oprnd2()) : "unpatched");
1909                 break;
1910
1911             case LIR_livei:
1912             case LIR_lived:
1913             CASE64(LIR_liveq:)
1914             case LIR_reti:
1915             CASE64(LIR_retq:)
1916             case LIR_retd:
1917                 VMPI_snprintf(s, n, "%s %s", lirNames[op], formatRef(&b1, i->oprnd1()));
1918                 break;
1919
1920             CASESF(LIR_hcalli:)
1921             case LIR_negi:
1922             case LIR_negd:
1923             case LIR_i2d:
1924             case LIR_ui2d:
1925             CASESF(LIR_dlo2i:)
1926             CASESF(LIR_dhi2i:)
1927             case LIR_noti:
1928             CASE86(LIR_modi:)
1929             CASE64(LIR_i2q:)
1930             CASE64(LIR_ui2uq:)
1931             CASE64(LIR_q2i:)
1932             case LIR_d2i:
1933             CASE64(LIR_dasq:)
1934             CASE64(LIR_qasd:)
1935                 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i), lirNames[op],
1936                              formatRef(&b2, i->oprnd1()));
1937                 break;
1938
1939             case LIR_x:
1940             case LIR_xt:
1941             case LIR_xf:
1942             case LIR_xbarrier:
1943             case LIR_xtbl:
1944                 formatGuard(buf, i);
1945                 break;
1946
1947             case LIR_addxovi:
1948             case LIR_subxovi:
1949             case LIR_mulxovi:
1950                 formatGuardXov(buf, i);
1951                 break;
1952
1953             case LIR_addjovi:
1954             case LIR_subjovi:
1955             case LIR_muljovi:
1956             CASE64(LIR_addjovq:)
1957             CASE64(LIR_subjovq:)
1958                 VMPI_snprintf(s, n, "%s = %s %s, %s ; ovf -> %s", formatRef(&b1, i), lirNames[op],
1959                     formatRef(&b2, i->oprnd1()),
1960                     formatRef(&b3, i->oprnd2()),
1961                     i->oprnd3() ? formatRef(&b4, i->oprnd3()) : "unpatched");
1962                 break;
1963
1964             case LIR_addi:       CASE64(LIR_addq:)
1965             case LIR_subi:       CASE64(LIR_subq:)
1966             case LIR_muli:
1967             CASE86(LIR_divi:)
1968             case LIR_addd:
1969             case LIR_subd:
1970             case LIR_muld:
1971             case LIR_divd:
1972             case LIR_andi:       CASE64(LIR_andq:)
1973             case LIR_ori:        CASE64(LIR_orq:)
1974             case LIR_xori:       CASE64(LIR_xorq:)
1975             case LIR_lshi:       CASE64(LIR_lshq:)
1976             case LIR_rshi:       CASE64(LIR_rshq:)
1977             case LIR_rshui:      CASE64(LIR_rshuq:)
1978             case LIR_eqi:        CASE64(LIR_eqq:)
1979             case LIR_lti:        CASE64(LIR_ltq:)
1980             case LIR_lei:        CASE64(LIR_leq:)
1981             case LIR_gti:        CASE64(LIR_gtq:)
1982             case LIR_gei:        CASE64(LIR_geq:)
1983             case LIR_ltui:       CASE64(LIR_ltuq:)
1984             case LIR_leui:       CASE64(LIR_leuq:)
1985             case LIR_gtui:       CASE64(LIR_gtuq:)
1986             case LIR_geui:       CASE64(LIR_geuq:)
1987             case LIR_eqd:
1988             case LIR_ltd:
1989             case LIR_led:
1990             case LIR_gtd:
1991             case LIR_ged:
1992 #if NJ_SOFTFLOAT_SUPPORTED
1993             case LIR_ii2d:
1994 #endif
1995                 VMPI_snprintf(s, n, "%s = %s %s, %s", formatRef(&b1, i), lirNames[op],
1996                     formatRef(&b2, i->oprnd1()),
1997                     formatRef(&b3, i->oprnd2()));
1998                 break;
1999
2000             CASE64(LIR_cmovq:)
2001             case LIR_cmovi:
2002             case LIR_cmovd:
2003                 VMPI_snprintf(s, n, "%s = %s %s ? %s : %s", formatRef(&b1, i), lirNames[op],
2004                     formatRef(&b2, i->oprnd1()),
2005                     formatRef(&b3, i->oprnd2()),
2006                     formatRef(&b4, i->oprnd3()));
2007                 break;
2008
2009             case LIR_ldi:
2010             CASE64(LIR_ldq:)
2011             case LIR_ldd:
2012             case LIR_lduc2ui:
2013             case LIR_ldus2ui:
2014             case LIR_ldc2i:
2015             case LIR_lds2i:
2016             case LIR_ldf2d: {
2017                 const char* qualStr;
2018                 switch (i->loadQual()) {
2019                 case LOAD_CONST:        qualStr = "/c"; break;
2020                 case LOAD_NORMAL:       qualStr = "";   break;
2021                 case LOAD_VOLATILE:     qualStr = "/v"; break;
2022                 default: NanoAssert(0); qualStr = "/?"; break;
2023                 }
2024                 VMPI_snprintf(s, n, "%s = %s%s%s %s[%d]", formatRef(&b1, i), lirNames[op],
2025                     formatAccSet(&b2, i->accSet()), qualStr, formatRef(&b3, i->oprnd1()),
2026                     i->disp());
2027                 break;
2028             }
2029
2030             case LIR_sti:
2031             CASE64(LIR_stq:)
2032             case LIR_std:
2033             case LIR_sti2c:
2034             case LIR_sti2s:
2035             case LIR_std2f:
2036                 VMPI_snprintf(s, n, "%s%s %s[%d] = %s", lirNames[op],
2037                     formatAccSet(&b1, i->accSet()),
2038                     formatRef(&b2, i->oprnd2()),
2039                     i->disp(),
2040                     formatRef(&b3, i->oprnd1()));
2041                 break;
2042
2043             case LIR_comment:
2044                 VMPI_snprintf(s, n, "------------------------------ # %s", (char*)i->oprnd1());
2045                 break;
2046
2047             default:
2048                 NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
2049                 break;
2050         }
2051         return buf->buf;
2052     }
2053 #endif
2054
2055     CseFilter::CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator& alloc)
2056         : LirWriter(out),
2057           EMB_NUM_USED_ACCS(embNumUsedAccs),
2058           CSE_NUM_USED_ACCS(EMB_NUM_USED_ACCS + 2),
2059           CSE_ACC_CONST(    EMB_NUM_USED_ACCS + 0),
2060           CSE_ACC_MULTIPLE( EMB_NUM_USED_ACCS + 1),
2061           storesSinceLastLoad(ACCSET_NONE),
2062           alloc(alloc),
2063           knownCmpValues(alloc),
2064           suspended(false),
2065           initOOM(false)
2066     {
2067         m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
2068         m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
2069         m_findNL[NLImmQ]      = PTR_SIZE(NULL, &CseFilter::findImmQ);
2070         m_findNL[NLImmD]      = &CseFilter::findImmD;
2071         m_findNL[NL1]         = &CseFilter::find1;
2072         m_findNL[NL2]         = &CseFilter::find2;
2073         m_findNL[NL3]         = &CseFilter::find3;
2074         m_findNL[NLCall]      = &CseFilter::findCall;
2075
2076         m_capNL[NLImmISmall]  = 17;   // covers 0..16, which is over half the cases for TraceMonkey
2077         m_capNL[NLImmILarge]  = 64;
2078         m_capNL[NLImmQ]       = PTR_SIZE(0, 16);
2079         m_capNL[NLImmD]       = 16;
2080         m_capNL[NL1]          = 256;
2081         m_capNL[NL2]          = 512;
2082         m_capNL[NL3]          = 16;
2083         m_capNL[NLCall]       = 64;
2084
2085         // The largish allocations are fallible, the small ones are
2086         // infallible.  See the comment on initOOM's declaration for why.
2087
2088         for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
2089             m_listNL[nlkind] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
2090             if (!m_listNL[nlkind]) {
2091                 initOOM = true;
2092                 return;
2093             }
2094             m_usedNL[nlkind] = 1; // Force memset in clearAll().
2095         }
2096
2097         // Note that this allocates the CONST and MULTIPLE tables as well.
2098         for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
2099             m_capL[a] = 16;
2100             m_listL[a] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[a]);
2101             if (!m_listL[a]) {
2102                 initOOM = true;
2103                 return;
2104             }
2105             m_usedL[a] = 1; // Force memset(0) in first clearAll().
2106         }
2107
2108         clearAll();
2109     }
2110
2111     // Inlined/separated version of SuperFastHash.
2112     // This content is copyrighted by Paul Hsieh.
2113     // For reference see: http://www.azillionmonkeys.com/qed/hash.html
2114     //
2115     inline uint32_t CseFilter::hash8(uint32_t hash, const uint8_t data)
2116     {
2117         hash += data;
2118         hash ^= hash << 10;
2119         hash += hash >> 1;
2120         return hash;
2121     }
2122
2123     inline uint32_t CseFilter::hash32(uint32_t hash, const uint32_t data)
2124     {
2125         const uint32_t dlo = data & 0xffff;
2126         const uint32_t dhi = data >> 16;
2127         hash += dlo;
2128         const uint32_t tmp = (dhi << 11) ^ hash;
2129         hash = (hash << 16) ^ tmp;
2130         hash += hash >> 11;
2131         return hash;
2132     }
2133
2134     inline uint32_t CseFilter::hashptr(uint32_t hash, const void* data)
2135     {
2136 #ifdef NANOJIT_64BIT
2137         hash = hash32(hash, uint32_t(uintptr_t(data) >> 32));
2138         hash = hash32(hash, uint32_t(uintptr_t(data)));
2139         return hash;
2140 #else
2141         return hash32(hash, uint32_t(data));
2142 #endif
2143     }
2144
2145     inline uint32_t CseFilter::hashfinish(uint32_t hash)
2146     {
2147         /* Force "avalanching" of final 127 bits */
2148         hash ^= hash << 3;
2149         hash += hash >> 5;
2150         hash ^= hash << 4;
2151         hash += hash >> 17;
2152         hash ^= hash << 25;
2153         hash += hash >> 6;
2154         return hash;
2155     }
2156
2157     void CseFilter::clearNL(NLKind nlkind) {
2158         if (m_usedNL[nlkind] > 0) {
2159             VMPI_memset(m_listNL[nlkind], 0, sizeof(LIns*)*m_capNL[nlkind]);
2160             m_usedNL[nlkind] = 0;
2161         }
2162     }
2163
2164     void CseFilter::clearL(CseAcc a) {
2165         if (m_usedL[a] > 0) {
2166             VMPI_memset(m_listL[a], 0, sizeof(LIns*)*m_capL[a]);
2167             m_usedL[a] = 0;
2168         }
2169     }
2170
2171     void CseFilter::clearAll() {
2172         for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
2173             clearNL(nlkind);
2174
2175         // Note that this clears the CONST and MULTIPLE load tables as well.
2176         for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++)
2177             clearL(a);
2178
2179         knownCmpValues.clear();
2180     }
2181
2182     inline uint32_t CseFilter::hashImmI(int32_t a) {
2183         return hashfinish(hash32(0, a));
2184     }
2185
2186     inline uint32_t CseFilter::hashImmQorD(uint64_t a) {
2187         uint32_t hash = hash32(0, uint32_t(a >> 32));
2188         return hashfinish(hash32(hash, uint32_t(a)));
2189     }
2190
2191     inline uint32_t CseFilter::hash1(LOpcode op, LIns* a) {
2192         uint32_t hash = hash8(0, uint8_t(op));
2193         return hashfinish(hashptr(hash, a));
2194     }
2195
2196     inline uint32_t CseFilter::hash2(LOpcode op, LIns* a, LIns* b) {
2197         uint32_t hash = hash8(0, uint8_t(op));
2198         hash = hashptr(hash, a);
2199         return hashfinish(hashptr(hash, b));
2200     }
2201
2202     inline uint32_t CseFilter::hash3(LOpcode op, LIns* a, LIns* b, LIns* c) {
2203         uint32_t hash = hash8(0, uint8_t(op));
2204         hash = hashptr(hash, a);
2205         hash = hashptr(hash, b);
2206         return hashfinish(hashptr(hash, c));
2207     }
2208
2209     // Nb: no need to hash the load's MiniAccSet because each every load goes
2210     // into a table where all the loads have the same MiniAccSet.
2211     inline uint32_t CseFilter::hashLoad(LOpcode op, LIns* a, int32_t d) {
2212         uint32_t hash = hash8(0, uint8_t(op));
2213         hash = hashptr(hash, a);
2214         return hashfinish(hash32(hash, d));
2215     }
2216
2217     inline uint32_t CseFilter::hashCall(const CallInfo *ci, uint32_t argc, LIns* args[]) {
2218         uint32_t hash = hashptr(0, ci);
2219         for (int32_t j=argc-1; j >= 0; j--)
2220             hash = hashptr(hash,args[j]);
2221         return hashfinish(hash);
2222     }
2223
2224     bool CseFilter::growNL(NLKind nlkind)
2225     {
2226         NanoAssert(nlkind != NLImmISmall);
2227         const uint32_t oldcap = m_capNL[nlkind];
2228         m_capNL[nlkind] <<= 1;
2229         // We make this allocation fallible because it's potentially large and
2230         // easy to recover from.  If it fails, we won't add any more
2231         // instructions to the table and some CSE opportunities may be missed.
2232         LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
2233         if (tmp) {
2234             LIns** oldlist = m_listNL[nlkind];
2235             m_listNL[nlkind] = tmp;
2236             VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
2237             find_t find = m_findNL[nlkind];
2238             for (uint32_t i = 0; i < oldcap; i++) {
2239                 LIns* ins = oldlist[i];
2240                 if (!ins) continue;
2241                 uint32_t j = (this->*find)(ins);
2242                 NanoAssert(!m_listNL[nlkind][j]);
2243                 m_listNL[nlkind][j] = ins;
2244             }
2245             return true;
2246         } else {
2247             m_capNL[nlkind] = oldcap;
2248             return false;
2249         }
2250     }
2251
2252     bool CseFilter::growL(CseAcc cseAcc)
2253     {
2254         const uint32_t oldcap = m_capL[cseAcc];
2255         m_capL[cseAcc] <<= 1;
2256         LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[cseAcc]);
2257         if (tmp) {
2258             LIns** oldlist = m_listL[cseAcc];
2259             m_listL[cseAcc] = tmp;
2260             VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
2261             find_t find = &CseFilter::findLoad;
2262             for (uint32_t i = 0; i < oldcap; i++) {
2263                 LIns* ins = oldlist[i];
2264                 if (!ins) continue;
2265                 uint32_t j = (this->*find)(ins);
2266                 NanoAssert(!m_listL[cseAcc][j]);
2267                 m_listL[cseAcc][j] = ins;
2268             }
2269             return true;
2270         } else {
2271             m_capL[cseAcc] = oldcap;
2272             return false;
2273         }
2274     }
2275
2276     void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
2277     {
2278         NanoAssert(!initOOM);
2279         if (suspended) return;
2280         NLKind nlkind = NLImmISmall;
2281         NanoAssert(k < m_capNL[nlkind]);
2282         NanoAssert(!m_listNL[nlkind][k]);
2283         m_usedNL[nlkind]++;
2284         m_listNL[nlkind][k] = ins;
2285     }
2286
2287     void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
2288     {
2289         NanoAssert(!initOOM);
2290         if (suspended) return;
2291         NanoAssert(!m_listNL[nlkind][k]);
2292         m_usedNL[nlkind]++;
2293         m_listNL[nlkind][k] = ins;
2294         if ((m_usedNL[nlkind] * 4) >= (m_capNL[nlkind] * 3)) {  // load factor of 0.75
2295             bool ok = growNL(nlkind);
2296             if (!ok) {
2297                 // OOM: undo the insertion.
2298                 m_usedNL[nlkind]--;
2299                 m_listNL[nlkind][k] = NULL;
2300             }
2301         }
2302     }
2303
2304     void CseFilter::addL(LIns* ins, uint32_t k)
2305     {
2306         NanoAssert(!initOOM);
2307         if (suspended) return;
2308         CseAcc cseAcc = miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual());
2309         NanoAssert(!m_listL[cseAcc][k]);
2310         m_usedL[cseAcc]++;
2311         m_listL[cseAcc][k] = ins;
2312         if ((m_usedL[cseAcc] * 4) >= (m_capL[cseAcc] * 3)) {  // load factor of 0.75
2313             bool ok = growL(cseAcc);
2314             if (!ok) {
2315                 // OOM: undo the insertion.
2316                 m_usedL[cseAcc]--;
2317                 m_listL[cseAcc][k] = NULL;
2318             }
2319         }
2320     }
2321
2322     inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
2323     {
2324         // This one is a direct array lookup rather than a hashtable lookup.
2325         NLKind nlkind = NLImmISmall;
2326         k = a;
2327         LIns* ins = m_listNL[nlkind][k];
2328         NanoAssert(!ins || ins->isImmI(a));
2329         return ins;
2330     }
2331
2332     uint32_t CseFilter::findImmISmall(LIns* ins)
2333     {
2334         uint32_t k;
2335         findImmISmall(ins->immI(), k);
2336         return k;
2337     }
2338
2339     inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
2340     {
2341         NLKind nlkind = NLImmILarge;
2342         const uint32_t bitmask = m_capNL[nlkind] - 1;
2343         k = hashImmI(a) & bitmask;
2344         uint32_t n = 1;
2345         while (true) {
2346             LIns* ins = m_listNL[nlkind][k];
2347             if (!ins)
2348                 return NULL;
2349             NanoAssert(ins->isImmI());
2350             if (ins->immI() == a)
2351                 return ins;
2352             // Quadratic probe:  h(k,i) = h(k) + 0.5i + 0.5i^2, which gives the
2353             // sequence h(k), h(k)+1, h(k)+3, h(k)+6, h+10, ...  This is a
2354             // good sequence for 2^n-sized tables as the values h(k,i) for i
2355             // in [0,m − 1] are all distinct so termination is guaranteed.
2356             // See http://portal.acm.org/citation.cfm?id=360737 and
2357             // http://en.wikipedia.org/wiki/Quadratic_probing (fetched
2358             // 06-Nov-2009) for more details.
2359             k = (k + n) & bitmask;
2360             n += 1;
2361         }
2362     }
2363
2364     uint32_t CseFilter::findImmILarge(LIns* ins)
2365     {
2366         uint32_t k;
2367         findImmILarge(ins->immI(), k);
2368         return k;
2369     }
2370
2371 #ifdef NANOJIT_64BIT
2372     inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
2373     {
2374         NLKind nlkind = NLImmQ;
2375         const uint32_t bitmask = m_capNL[nlkind] - 1;
2376         k = hashImmQorD(a) & bitmask;
2377         uint32_t n = 1;
2378         while (true) {
2379             LIns* ins = m_listNL[nlkind][k];
2380             if (!ins)
2381                 return NULL;
2382             NanoAssert(ins->isImmQ());
2383             if (ins->immQ() == a)
2384                 return ins;
2385             k = (k + n) & bitmask;
2386             n += 1;
2387         }
2388     }
2389
2390     uint32_t CseFilter::findImmQ(LIns* ins)
2391     {
2392         uint32_t k;
2393         findImmQ(ins->immQ(), k);
2394         return k;
2395     }
2396 #endif
2397
2398     inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
2399     {
2400         NLKind nlkind = NLImmD;
2401         const uint32_t bitmask = m_capNL[nlkind] - 1;
2402         k = hashImmQorD(a) & bitmask;
2403         uint32_t n = 1;
2404         while (true) {
2405             LIns* ins = m_listNL[nlkind][k];
2406             if (!ins)
2407                 return NULL;
2408             NanoAssert(ins->isImmD());
2409             if (ins->immDasQ() == a)
2410                 return ins;
2411             k = (k + n) & bitmask;
2412             n += 1;
2413         }
2414     }
2415
2416     uint32_t CseFilter::findImmD(LIns* ins)
2417     {
2418         uint32_t k;
2419         findImmD(ins->immDasQ(), k);
2420         return k;
2421     }
2422
2423     inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
2424     {
2425         NLKind nlkind = NL1;
2426         const uint32_t bitmask = m_capNL[nlkind] - 1;
2427         k = hash1(op, a) & bitmask;
2428         uint32_t n = 1;
2429         while (true) {
2430             LIns* ins = m_listNL[nlkind][k];
2431             if (!ins)
2432                 return NULL;
2433             if (ins->isop(op) && ins->oprnd1() == a)
2434                 return ins;
2435             k = (k + n) & bitmask;
2436             n += 1;
2437         }
2438     }
2439
2440     uint32_t CseFilter::find1(LIns* ins)
2441     {
2442         uint32_t k;
2443         find1(ins->opcode(), ins->oprnd1(), k);
2444         return k;
2445     }
2446
2447     inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
2448     {
2449         NLKind nlkind = NL2;
2450         const uint32_t bitmask = m_capNL[nlkind] - 1;
2451         k = hash2(op, a, b) & bitmask;
2452         uint32_t n = 1;
2453         while (true) {
2454             LIns* ins = m_listNL[nlkind][k];
2455             if (!ins)
2456                 return NULL;
2457             if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b)
2458                 return ins;
2459             k = (k + n) & bitmask;
2460             n += 1;
2461         }
2462     }
2463
2464     uint32_t CseFilter::find2(LIns* ins)
2465     {
2466         uint32_t k;
2467         find2(ins->opcode(), ins->oprnd1(), ins->oprnd2(), k);
2468         return k;
2469     }
2470
2471     inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
2472     {
2473         NLKind nlkind = NL3;
2474         const uint32_t bitmask = m_capNL[nlkind] - 1;
2475         k = hash3(op, a, b, c) & bitmask;
2476         uint32_t n = 1;
2477         while (true) {
2478             LIns* ins = m_listNL[nlkind][k];
2479             if (!ins)
2480                 return NULL;
2481             if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c)
2482                 return ins;
2483             k = (k + n) & bitmask;
2484             n += 1;
2485         }
2486     }
2487
2488     uint32_t CseFilter::find3(LIns* ins)
2489     {
2490         uint32_t k;
2491         find3(ins->opcode(), ins->oprnd1(), ins->oprnd2(), ins->oprnd3(), k);
2492         return k;
2493     }
2494
2495     inline LIns* CseFilter::findLoad(LOpcode op, LIns* a, int32_t d, MiniAccSet miniAccSet,
2496                                      LoadQual loadQual, uint32_t &k)
2497     {
2498         CseAcc cseAcc = miniAccSetToCseAcc(miniAccSet, loadQual);
2499         const uint32_t bitmask = m_capL[cseAcc] - 1;
2500         k = hashLoad(op, a, d) & bitmask;
2501         uint32_t n = 1;
2502         while (true) {
2503             LIns* ins = m_listL[cseAcc][k];
2504             if (!ins)
2505                 return NULL;
2506             // All the loads in this table should have the same miniAccSet and
2507             // loadQual.
2508             NanoAssert(miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual()) == cseAcc &&
2509                        ins->loadQual() == loadQual);
2510             if (ins->isop(op) && ins->oprnd1() == a && ins->disp() == d)
2511                 return ins;
2512             k = (k + n) & bitmask;
2513             n += 1;
2514         }
2515     }
2516
2517     uint32_t CseFilter::findLoad(LIns* ins)
2518     {
2519         uint32_t k;
2520         findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->miniAccSet(), ins->loadQual(), k);
2521         return k;
2522     }
2523
2524     bool argsmatch(LIns* ins, uint32_t argc, LIns* args[])
2525     {
2526         for (uint32_t j=0; j < argc; j++)
2527             if (ins->arg(j) != args[j])
2528                 return false;
2529         return true;
2530     }
2531
2532     inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
2533     {
2534         NLKind nlkind = NLCall;
2535         const uint32_t bitmask = m_capNL[nlkind] - 1;
2536         k = hashCall(ci, argc, args) & bitmask;
2537         uint32_t n = 1;
2538         while (true) {
2539             LIns* ins = m_listNL[nlkind][k];
2540             if (!ins)
2541                 return NULL;
2542             if (ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args))
2543                 return ins;
2544             k = (k + n) & bitmask;
2545             n += 1;
2546         }
2547     }
2548
2549     uint32_t CseFilter::findCall(LIns* ins)
2550     {
2551         LIns* args[MAXARGS];
2552         uint32_t argc = ins->argc();
2553         NanoAssert(argc < MAXARGS);
2554         for (uint32_t j=0; j < argc; j++)
2555             args[j] = ins->arg(j);
2556         uint32_t k;
2557         findCall(ins->callInfo(), argc, args, k);
2558         return k;
2559     }
2560
2561     LIns* CseFilter::insImmI(int32_t imm)
2562     {
2563         uint32_t k;
2564         LIns* ins;
2565         if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
2566             ins = findImmISmall(imm, k);
2567             if (!ins) {
2568                 ins = out->insImmI(imm);
2569                 addNLImmISmall(ins, k);
2570             }
2571         } else {
2572             ins = findImmILarge(imm, k);
2573             if (!ins) {
2574                 ins = out->insImmI(imm);
2575                 addNL(NLImmILarge, ins, k);
2576             }
2577         }
2578         // We assume that downstream stages do not modify the instruction, so
2579         // that we can insert 'ins' into slot 'k'.  Check this.
2580         NanoAssert(ins->isop(LIR_immi) && ins->immI() == imm);
2581         return ins;
2582     }
2583
2584 #ifdef NANOJIT_64BIT
2585     LIns* CseFilter::insImmQ(uint64_t q)
2586     {
2587         uint32_t k;
2588         LIns* ins = findImmQ(q, k);
2589         if (!ins) {
2590             ins = out->insImmQ(q);
2591             addNL(NLImmQ, ins, k);
2592         }
2593         NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
2594         return ins;
2595     }
2596 #endif
2597
2598     LIns* CseFilter::insImmD(double d)
2599     {
2600         uint32_t k;
2601         // We must pun 'd' as a uint64_t otherwise 0 and -0 will be treated as
2602         // equal, which breaks things (see bug 527288).
2603         union {
2604             double d;
2605             uint64_t u64;
2606         } u;
2607         u.d = d;
2608         LIns* ins = findImmD(u.u64, k);
2609         if (!ins) {
2610             ins = out->insImmD(d);
2611             addNL(NLImmD, ins, k);
2612         }
2613         NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
2614         return ins;
2615     }
2616
2617     LIns* CseFilter::ins0(LOpcode op)
2618     {
2619         if (op == LIR_label && !suspended)
2620             clearAll();
2621         return out->ins0(op);
2622     }
2623
2624     LIns* CseFilter::ins1(LOpcode op, LIns* a)
2625     {
2626         LIns* ins;
2627         if (isCseOpcode(op)) {
2628             uint32_t k;
2629             ins = find1(op, a, k);
2630             if (!ins) {
2631                 ins = out->ins1(op, a);
2632                 addNL(NL1, ins, k);
2633             }
2634         } else {
2635             ins = out->ins1(op, a);
2636         }
2637         NanoAssert(ins->isop(op) && ins->oprnd1() == a);
2638         return ins;
2639     }
2640
2641     LIns* CseFilter::ins2(LOpcode op, LIns* a, LIns* b)
2642     {
2643         LIns* ins;
2644         NanoAssert(isCseOpcode(op));
2645         uint32_t k;
2646         ins = find2(op, a, b, k);
2647         if (!ins) {
2648             ins = out->ins2(op, a, b);
2649             addNL(NL2, ins, k);
2650         } else if (ins->isCmp()) {
2651             if (knownCmpValues.containsKey(ins)) {
2652                 // We've seen this comparison before, and it was previously
2653                 // used in a guard, so we know what its value must be at this
2654                 // point.  Replace it with a constant.
2655                 NanoAssert(ins->isCmp());
2656                 bool cmpValue = knownCmpValues.get(ins);
2657                 return insImmI(cmpValue ? 1 : 0);
2658             }
2659         }
2660         NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
2661         return ins;
2662     }
2663
2664     LIns* CseFilter::ins3(LOpcode op, LIns* a, LIns* b, LIns* c)
2665     {
2666         NanoAssert(isCseOpcode(op));
2667         uint32_t k;
2668         LIns* ins = find3(op, a, b, c, k);
2669         if (!ins) {
2670             ins = out->ins3(op, a, b, c);
2671             addNL(NL3, ins, k);
2672         }
2673         NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
2674         return ins;
2675     }
2676
2677     LIns* CseFilter::insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual)
2678     {
2679         LIns* ins;
2680         if (isS16(disp)) {
2681             if (storesSinceLastLoad != ACCSET_NONE) {
2682                 // Clear all normal (excludes CONST and MULTIPLE) loads
2683                 // aliased by stores and calls since the last time we were in
2684                 // this function.  Aliased loads must be cleared even when CSE
2685                 // is suspended.
2686                 AccSet a = storesSinceLastLoad & ((1 << EMB_NUM_USED_ACCS) - 1);
2687                 while (a) {
2688                     int acc = msbSet32(a);
2689                     clearL((CseAcc)acc);
2690                     a &= ~(1 << acc);
2691                 }
2692
2693                 // No need to clear CONST loads (those in the CSE_ACC_CONST table).
2694
2695                 // Multi-region loads must be treated conservatively -- we
2696                 // always clear all of them.
2697                 clearL(CSE_ACC_MULTIPLE);
2698
2699                 storesSinceLastLoad = ACCSET_NONE;
2700             }
2701
2702             if (loadQual == LOAD_VOLATILE) {
2703                 // Volatile loads are never CSE'd, don't bother looking for
2704                 // them or inserting them in the table.
2705                 ins = out->insLoad(op, base, disp, accSet, loadQual);
2706             } else {
2707                 uint32_t k;
2708                 ins = findLoad(op, base, disp, compressAccSet(accSet), loadQual, k);
2709                 if (!ins) {
2710                     ins = out->insLoad(op, base, disp, accSet, loadQual);
2711                     addL(ins, k);
2712                 }
2713             }
2714             // Nb: must compare miniAccSets, not AccSets, because the AccSet
2715             // stored in the load may have lost info if it's multi-region.
2716             NanoAssert(ins->isop(op) && ins->oprnd1() == base && ins->disp() == disp &&
2717                        ins->miniAccSet().val == compressAccSet(accSet).val &&
2718                        ins->loadQual() == loadQual);
2719         } else {
2720             // If the displacement is more than 16 bits, put it in a separate
2721             // instruction.  Nb: LirBufWriter also does this, we do it here
2722             // too because CseFilter relies on LirBufWriter not changing code.
2723             ins = insLoad(op, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet, loadQual);
2724         }
2725         return ins;
2726     }
2727
2728     LIns* CseFilter::insStore(LOpcode op, LIns* value, LIns* base, int32_t disp, AccSet accSet)
2729     {
2730         LIns* ins;
2731         if (isS16(disp)) {
2732             storesSinceLastLoad |= accSet;
2733             ins = out->insStore(op, value, base, disp, accSet);
2734             NanoAssert(ins->isop(op) && ins->oprnd1() == value && ins->oprnd2() == base &&
2735                        ins->disp() == disp && ins->accSet() == accSet);
2736         } else {
2737             // If the displacement is more than 16 bits, put it in a separate
2738             // instruction.  Nb: LirBufWriter also does this, we do it here
2739             // too because CseFilter relies on LirBufWriter not changing code.
2740             ins = insStore(op, value, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet);
2741         }
2742         return ins;
2743     }
2744
2745     LIns* CseFilter::insGuard(LOpcode op, LIns* c, GuardRecord *gr)
2746     {
2747         // LIR_xt and LIR_xf guards are CSEable.  Note that we compare the
2748         // opcode and condition when determining if two guards are equivalent
2749         // -- in find1() and hash1() -- but we do *not* compare the
2750         // GuardRecord.  This works because:
2751         // - If guard 1 is taken (exits) then guard 2 is never reached, so
2752         //   guard 2 can be removed.
2753         // - If guard 1 is not taken then neither is guard 2, so guard 2 can
2754         //   be removed.
2755         //
2756         // The underlying assumptions that are required for this to be safe:
2757         // - There's never a path from the side exit of guard 1 back to guard
2758         //   2;  for tree-shaped fragments this should be true.
2759         // - GuardRecords do not contain information other than what is needed
2760         //   to execute a successful exit.  That is currently true.
2761         // - The CSE algorithm will always keep guard 1 and remove guard 2
2762         //   (not vice versa).  The current algorithm does this.
2763         //
2764         LIns* ins;
2765         if (isCseOpcode(op)) {
2766             // conditional guard
2767             uint32_t k;
2768             ins = find1(op, c, k);
2769             if (!ins) {
2770                 ins = out->insGuard(op, c, gr);
2771                 addNL(NL1, ins, k);
2772             }
2773             // After this guard, we know that 'c's result was true (if
2774             // op==LIR_xf) or false (if op==LIR_xt), else we would have
2775             // exited.  Record this fact in case 'c' occurs again.
2776             if (!suspended) {
2777                 bool c_value = (op == LIR_xt ? false : true);
2778                 knownCmpValues.put(c, c_value);
2779             }
2780         } else {
2781             ins = out->insGuard(op, c, gr);
2782         }
2783         NanoAssert(ins->isop(op) && ins->oprnd1() == c);
2784         return ins;
2785     }
2786
2787     LIns* CseFilter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr)
2788     {
2789         // LIR_*xov are CSEable.  See CseFilter::insGuard() for details.
2790         NanoAssert(isCseOpcode(op));
2791         // conditional guard
2792         uint32_t k;
2793         LIns* ins = find2(op, a, b, k);
2794         if (!ins) {
2795             ins = out->insGuardXov(op, a, b, gr);
2796             addNL(NL2, ins, k);
2797         }
2798         NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
2799         return ins;
2800     }
2801
2802     // There is no CseFilter::insBranchJov(), as LIR_*jov* are not CSEable.
2803
2804     LIns* CseFilter::insCall(const CallInfo *ci, LIns* args[])
2805     {
2806         LIns* ins;
2807         uint32_t argc = ci->count_args();
2808         if (ci->_isPure) {
2809             NanoAssert(ci->_storeAccSet == ACCSET_NONE);
2810             uint32_t k;
2811             ins = findCall(ci, argc, args, k);
2812             if (!ins) {
2813                 ins = out->insCall(ci, args);
2814                 addNL(NLCall, ins, k);
2815             }
2816         } else {
2817             // We only need to worry about aliasing if !ci->_isPure.
2818             storesSinceLastLoad |= ci->_storeAccSet;
2819             ins = out->insCall(ci, args);
2820         }
2821         NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
2822         return ins;
2823     }
2824
2825     // Interval analysis can be done much more accurately than we do here.
2826     // For speed and simplicity in a number of cases (eg. LIR_andi, LIR_rshi)
2827     // we just look for easy-to-handle (but common!) cases such as when the
2828     // RHS is a constant;  in practice this gives good results.  It also cuts
2829     // down the amount of backwards traversals we have to do, which is good.
2830     //
2831     // 'lim' also limits the number of backwards traversals;  it's decremented
2832     // on each recursive call and we give up when it reaches zero.  This
2833     // prevents possible time blow-ups in long expression chains.  We don't
2834     // check 'lim' at the top of this function, as you might expect, because
2835     // the behaviour when the limit is reached depends on the opcode.
2836     //
2837     Interval Interval::of(LIns* ins, int lim)
2838     {
2839         switch (ins->opcode()) {
2840         case LIR_immi: {
2841             int32_t i = ins->immI();
2842             return Interval(i, i);
2843         }
2844
2845         case LIR_ldc2i:   return Interval(  -128,   127);
2846         case LIR_lduc2ui: return Interval(     0,   255);
2847         case LIR_lds2i:   return Interval(-32768, 32767);
2848         case LIR_ldus2ui: return Interval(     0, 65535);
2849
2850         case LIR_addi:
2851         case LIR_addxovi:
2852         case LIR_addjovi:
2853             if (lim > 0)
2854                 return add(of(ins->oprnd1(), lim-1), of(ins->oprnd2(), lim-1));
2855             goto overflow;
2856
2857         case LIR_subi:
2858         case LIR_subxovi:
2859         case LIR_subjovi:
2860             if (lim > 0)
2861                 return sub(of(ins->oprnd1(), lim-1), of(ins->oprnd2(), lim-1));
2862             goto overflow;
2863
2864         case LIR_negi:
2865             if (lim > 0)
2866                 return sub(Interval(0, 0), of(ins->oprnd1(), lim-1));
2867             goto overflow;
2868
2869         case LIR_muli:
2870         case LIR_mulxovi:
2871         case LIR_muljovi:
2872             if (lim > 0)
2873                 return mul(of(ins->oprnd1(), lim), of(ins->oprnd2(), lim));
2874             goto overflow;
2875
2876         case LIR_andi: {
2877             // Only handle one common case accurately, for speed and simplicity.
2878             if (ins->oprnd2()->isImmI() && ins->oprnd2()->immI() > 0) {
2879                 // Example:  andi [lo,hi], 0xffff --> [0, 0xffff]
2880                 return Interval(0, ins->oprnd2()->immI());
2881             }
2882             goto worst_non_overflow;
2883         }
2884
2885         case LIR_rshui: {
2886             // Only handle one common case accurately, for speed and simplicity.
2887             if (ins->oprnd2()->isImmI() && lim > 0) {
2888                 Interval x = of(ins->oprnd1(), lim-1);
2889                 int32_t y = ins->oprnd2()->immI() & 0x1f;   // we only use the bottom 5 bits
2890                 NanoAssert(x.isSane());
2891                 if (!x.hasOverflowed && (x.lo >= 0 || y > 0)) {
2892                     // If LHS is non-negative or RHS is positive, the result is
2893                     // non-negative because the top bit must be zero.
2894                     // Example:  rshui [0,hi], 16 --> [0, hi>>16]
2895                     return Interval(0, x.hi >> y);
2896                 }
2897             }
2898             goto worst_non_overflow;
2899         }
2900
2901         case LIR_rshi: {
2902             // Only handle one common case accurately, for speed and simplicity.
2903             if (ins->oprnd2()->isImmI()) {
2904                 // Example:  rshi [lo,hi], 16 --> [32768, 32767]
2905                 int32_t y = ins->oprnd2()->immI() & 0x1f;   // we only use the bottom 5 bits
2906                 return Interval(-(1 << (31 - y)),
2907                                  (1 << (31 - y)) - 1);
2908             }
2909             goto worst_non_overflow;
2910         }
2911
2912 #if defined NANOJIT_IA32 || defined NANOJIT_X64
2913         case LIR_modi: {
2914             NanoAssert(ins->oprnd1()->isop(LIR_divi));
2915             LIns* op2 = ins->oprnd1()->oprnd2();
2916             // Only handle one common case accurately, for speed and simplicity.
2917             if (op2->isImmI() && op2->immI() != 0) {
2918                 int32_t y = op2->immI();
2919                 int32_t absy = (y >= 0) ? y : -y;
2920                 // The result must smaller in magnitude than 'y'.
2921                 // Example:  modi [lo,hi], 5 --> [-4, 4]
2922                 return Interval(-absy + 1, absy - 1);
2923             }
2924             goto worst_non_overflow;
2925         }
2926 #endif
2927
2928         case LIR_cmovi: {
2929             if (lim > 0) {
2930                 Interval x = of(ins->oprnd2(), lim-1);
2931                 Interval y = of(ins->oprnd3(), lim-1);
2932                 NanoAssert(x.isSane() && y.isSane());
2933                 if (!x.hasOverflowed && !y.hasOverflowed)
2934                     return Interval(NJ_MIN(x.lo, y.lo), NJ_MAX(x.hi, y.hi));
2935             }
2936             goto overflow;
2937         }
2938
2939         case LIR_eqi:   CASE64(LIR_eqq:)
2940         case LIR_lti:   CASE64(LIR_ltq:)
2941         case LIR_lei:   CASE64(LIR_leq:)
2942         case LIR_gti:   CASE64(LIR_gtq:)
2943         case LIR_gei:   CASE64(LIR_geq:)
2944         case LIR_ltui:  CASE64(LIR_ltuq:)
2945         case LIR_leui:  CASE64(LIR_leuq:)
2946         case LIR_gtui:  CASE64(LIR_gtuq:)
2947         case LIR_geui:  CASE64(LIR_geuq:)
2948         case LIR_eqd:
2949         case LIR_ltd:
2950         case LIR_led:
2951         case LIR_gtd:
2952         case LIR_ged:
2953             return Interval(0, 1);
2954
2955         CASE32(LIR_paramp:)
2956         case LIR_ldi:
2957         case LIR_noti:
2958         case LIR_ori:
2959         case LIR_xori:
2960         case LIR_lshi:
2961         CASE86(LIR_divi:)
2962         case LIR_calli:
2963         case LIR_reti:
2964         CASE64(LIR_q2i:)
2965         case LIR_d2i:
2966         CASESF(LIR_dlo2i:)
2967         CASESF(LIR_dhi2i:)
2968         CASESF(LIR_hcalli:)
2969             goto worst_non_overflow;
2970
2971         default:
2972             NanoAssertMsgf(0, "%s", lirNames[ins->opcode()]);
2973         }
2974
2975       overflow:
2976         return OverflowInterval();
2977
2978       worst_non_overflow:
2979         // Only cases that cannot overflow should reach here, ie. not add/sub/mul.
2980         return Interval(I32_MIN, I32_MAX);
2981     }
2982
2983     Interval Interval::add(Interval x, Interval y) {
2984         NanoAssert(x.isSane() && y.isSane());
2985
2986         if (x.hasOverflowed || y.hasOverflowed)
2987             return OverflowInterval();
2988
2989         // Nb: the bounds in x and y are known to fit in 32 bits (isSane()
2990         // checks that) so x.lo+y.lo and x.hi+y.hi are guaranteed to fit
2991         // in 64 bits.  This also holds for the other cases below such as
2992         // sub() and mul().
2993         return Interval(x.lo + y.lo, x.hi + y.hi);
2994     }
2995
2996     Interval Interval::sub(Interval x, Interval y) {
2997         NanoAssert(x.isSane() && y.isSane());
2998
2999         if (x.hasOverflowed || y.hasOverflowed)
3000             return OverflowInterval();
3001
3002         return Interval(x.lo - y.hi, x.hi - y.lo);
3003     }
3004
3005     Interval Interval::mul(Interval x, Interval y) {
3006         NanoAssert(x.isSane() && y.isSane());
3007
3008         if (x.hasOverflowed || y.hasOverflowed)
3009             return OverflowInterval();
3010
3011         int64_t a = x.lo * y.lo;
3012         int64_t b = x.lo * y.hi;
3013         int64_t c = x.hi * y.lo;
3014         int64_t d = x.hi * y.hi;
3015         return Interval(NJ_MIN(NJ_MIN(a, b), NJ_MIN(c, d)),
3016                         NJ_MAX(NJ_MAX(a, b), NJ_MAX(c, d)));
3017     }
3018
3019 #if NJ_SOFTFLOAT_SUPPORTED
3020     static int32_t FASTCALL d2i(double d)           { return (int32_t) d; }
3021     static double FASTCALL i2d(int32_t i)           { return i; }
3022     static double FASTCALL ui2d(uint32_t u)         { return u; }
3023     static double FASTCALL negd(double a)           { return -a; }
3024     static double FASTCALL addd(double a, double b) { return a + b; }
3025     static double FASTCALL subd(double a, double b) { return a - b; }
3026     static double FASTCALL muld(double a, double b) { return a * b; }
3027     static double FASTCALL divd(double a, double b) { return a / b; }
3028     static int32_t FASTCALL eqd(double a, double b) { return a == b; }
3029     static int32_t FASTCALL ltd(double a, double b) { return a <  b; }
3030     static int32_t FASTCALL gtd(double a, double b) { return a >  b; }
3031     static int32_t FASTCALL led(double a, double b) { return a <= b; }
3032     static int32_t FASTCALL ged(double a, double b) { return a >= b; }
3033
3034     #define SIG_I_D     CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_D)
3035     #define SIG_D_I     CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_I)
3036     #define SIG_D_UI    CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_UI)
3037     #define SIG_D_D     CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_D)
3038     #define SIG_D_DD    CallInfo::typeSig2(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D)
3039     #define SIG_B_DD    CallInfo::typeSig2(ARGTYPE_B, ARGTYPE_D, ARGTYPE_D)
3040
3041     #define SF_CALLINFO(name, typesig) \
3042         static const CallInfo name##_ci = \
3043             { (intptr_t)&name, typesig, ABI_FASTCALL, /*isPure*/1, ACCSET_NONE verbose_only(, #name) }
3044
3045     SF_CALLINFO(d2i,  SIG_I_D);
3046     SF_CALLINFO(i2d,  SIG_D_I);
3047     SF_CALLINFO(ui2d, SIG_D_UI);
3048     SF_CALLINFO(negd, SIG_D_D);
3049     SF_CALLINFO(addd, SIG_D_DD);
3050     SF_CALLINFO(subd, SIG_D_DD);
3051     SF_CALLINFO(muld, SIG_D_DD);
3052     SF_CALLINFO(divd, SIG_D_DD);
3053     SF_CALLINFO(eqd,  SIG_B_DD);
3054     SF_CALLINFO(ltd,  SIG_B_DD);
3055     SF_CALLINFO(gtd,  SIG_B_DD);
3056     SF_CALLINFO(led,  SIG_B_DD);
3057     SF_CALLINFO(ged,  SIG_B_DD);
3058
3059     SoftFloatOps::SoftFloatOps()
3060     {
3061         memset(opmap, 0, sizeof(opmap));
3062         opmap[LIR_d2i] = &d2i_ci;
3063         opmap[LIR_i2d] = &i2d_ci;
3064         opmap[LIR_ui2d] = &ui2d_ci;
3065         opmap[LIR_negd] = &negd_ci;
3066         opmap[LIR_addd] = &addd_ci;
3067         opmap[LIR_subd] = &subd_ci;
3068         opmap[LIR_muld] = &muld_ci;
3069         opmap[LIR_divd] = &divd_ci;
3070         opmap[LIR_eqd] = &eqd_ci;
3071         opmap[LIR_ltd] = &ltd_ci;
3072         opmap[LIR_gtd] = &gtd_ci;
3073         opmap[LIR_led] = &led_ci;
3074         opmap[LIR_ged] = &ged_ci;
3075     }
3076
3077     const SoftFloatOps softFloatOps;
3078
3079     SoftFloatFilter::SoftFloatFilter(LirWriter *out) : LirWriter(out)
3080     {}
3081
3082     LIns* SoftFloatFilter::split(LIns *a) {
3083         if (a->isD() && !a->isop(LIR_ii2d)) {
3084             // all F64 args must be qjoin's for soft-float
3085             a = ins2(LIR_ii2d, ins1(LIR_dlo2i, a), ins1(LIR_dhi2i, a));
3086         }
3087         return a;
3088     }
3089
3090     LIns* SoftFloatFilter::split(const CallInfo *call, LIns* args[]) {
3091         LIns *lo = out->insCall(call, args);
3092         LIns *hi = out->ins1(LIR_hcalli, lo);
3093         return out->ins2(LIR_ii2d, lo, hi);
3094     }
3095
3096     LIns* SoftFloatFilter::callD1(const CallInfo *call, LIns *a) {
3097         LIns *args[] = { split(a) };
3098         return split(call, args);
3099     }
3100
3101     LIns* SoftFloatFilter::callI1(const CallInfo *call, LIns *a) {
3102         LIns *args[] = { split(a) };
3103         return out->insCall(call, args);
3104     }
3105
3106     LIns* SoftFloatFilter::callD2(const CallInfo *call, LIns *a, LIns *b) {
3107         LIns *args[] = { split(b), split(a) };
3108         return split(call, args);
3109     }
3110
3111     LIns* SoftFloatFilter::cmpD(const CallInfo *call, LIns *a, LIns *b) {
3112         LIns *args[] = { split(b), split(a) };
3113         return out->ins2(LIR_eqi, out->insCall(call, args), out->insImmI(1));
3114     }
3115
3116     LIns* SoftFloatFilter::ins1(LOpcode op, LIns *a) {
3117         const CallInfo *ci = softFloatOps.opmap[op];
3118         if (ci) {
3119             if (ci->returnType() == ARGTYPE_D)
3120                 return callD1(ci, a);
3121             else
3122                 return callI1(ci, a);
3123         }
3124         if (op == LIR_retd)
3125             return out->ins1(op, split(a));
3126         return out->ins1(op, a);
3127     }
3128
3129     LIns* SoftFloatFilter::ins2(LOpcode op, LIns *a, LIns *b) {
3130         const CallInfo *ci = softFloatOps.opmap[op];
3131         if (ci) {
3132             if (isCmpDOpcode(op))
3133                 return cmpD(ci, a, b);
3134             return callD2(ci, a, b);
3135         }
3136         return out->ins2(op, a, b);
3137     }
3138
3139     LIns* SoftFloatFilter::insCall(const CallInfo *ci, LIns* args[]) {
3140         uint32_t nArgs = ci->count_args();
3141         for (uint32_t i = 0; i < nArgs; i++)
3142             args[i] = split(args[i]);
3143
3144         if (ci->returnType() == ARGTYPE_D) {
3145             // This function returns a double as two 32bit values, so replace
3146             // call with qjoin(qhi(call), call).
3147             return split(ci, args);
3148         }
3149         return out->insCall(ci, args);
3150     }
3151 #endif // NJ_SOFTFLOAT_SUPPORTED
3152
3153
3154     #endif /* FEATURE_NANOJIT */
3155
3156 #if defined(NJ_VERBOSE)
3157     AddrNameMap::AddrNameMap(Allocator& a)
3158         : allocator(a), names(a)
3159     {}
3160
3161     void AddrNameMap::addAddrRange(const void *p, size_t size, size_t align, const char *name)
3162     {
3163         if (!this || names.containsKey(p))
3164             return;
3165         char* copy = new (allocator) char[VMPI_strlen(name)+1];
3166         VMPI_strcpy(copy, name);
3167         Entry *e = new (allocator) Entry(copy, size << align, align);
3168         names.put(p, e);
3169     }
3170
3171     void AddrNameMap::lookupAddr(void *p, char*& name, int32_t& offset)
3172     {
3173         const void *start = names.findNear(p);
3174         if (start) {
3175             Entry *e = names.get(start);
3176             const void *end = (const char*)start + e->size;
3177             if (p == start) {
3178                 name = e->name;
3179                 offset = 0;
3180             }
3181             else if (p > start && p < end) {
3182                 name = e->name;
3183                 offset = int32_t(intptr_t(p)-intptr_t(start)) >> e->align;
3184             }
3185             else {
3186                 name = NULL;
3187                 offset = 0;
3188             }
3189         } else {
3190             name = NULL;
3191             offset = 0;
3192         }
3193     }
3194
3195     // ---------------------------------------------------------------
3196     // START debug-logging definitions
3197     // ---------------------------------------------------------------
3198
3199     void LogControl::printf( const char* format, ... )
3200     {
3201         va_list vargs;
3202         va_start(vargs, format);
3203         vfprintf(stdout, format, vargs);
3204         va_end(vargs);
3205         // Flush every line immediately so that if crashes occur in generated
3206         // code we won't lose any output.
3207         fflush(stdout);
3208     }
3209
3210 #endif // NJ_VERBOSE
3211
3212
3213 #ifdef FEATURE_NANOJIT
3214 #ifdef DEBUG
3215     const char* ValidateWriter::type2string(LTy type)
3216     {
3217         switch (type) {
3218         case LTy_V:                     return "void";
3219         case LTy_I:                     return "int";
3220 #ifdef NANOJIT_64BIT
3221         case LTy_Q:                     return "quad";
3222 #endif
3223         case LTy_D:                     return "double";
3224         default:       NanoAssert(0);   return "???";
3225         }
3226     }
3227
3228     void ValidateWriter::typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[])
3229     {
3230         NanoAssert(nArgs >= 0);
3231
3232         // Type-check the arguments.
3233         for (int i = 0; i < nArgs; i++) {
3234             LTy formal = formals[i];
3235             LTy actual = args[i]->retType();
3236             if (formal != actual) {
3237                 // Assert on a type error.  The disadvantage of doing this (as
3238                 // opposed to printing a message and continuing) is that at
3239                 // most one type error will be detected per run.  But type
3240                 // errors should be rare, and assertion failures are certain
3241                 // to be caught by test suites whereas error messages may not
3242                 // be.
3243                 NanoAssertMsgf(0,
3244                     "LIR type error (%s): arg %d of '%s' is '%s' "
3245                     "which has type %s (expected %s)",
3246                     whereInPipeline, i+1, lirNames[op],
3247                     lirNames[args[i]->opcode()],
3248                     type2string(actual), type2string(formal));
3249             }
3250         }
3251     }
3252
3253     void ValidateWriter::errorStructureShouldBe(LOpcode op, const char* argDesc, int argN,
3254                                                 LIns* arg, const char* shouldBeDesc)
3255     {
3256         NanoAssertMsgf(0,
3257             "LIR structure error (%s): %s %d of '%s' is '%s' (expected %s)",
3258             whereInPipeline, argDesc, argN,
3259             lirNames[op], lirNames[arg->opcode()], shouldBeDesc);
3260     }
3261
3262     void ValidateWriter::errorAccSet(const char* what, AccSet accSet, const char* shouldDesc)
3263     {
3264         RefBuf b;
3265         NanoAssertMsgf(0,
3266             "LIR AccSet error (%s): '%s' AccSet is '%s'; %s",
3267             whereInPipeline, what, printer->formatAccSet(&b, accSet), shouldDesc);
3268     }
3269
3270     void ValidateWriter::errorLoadQual(const char* what, LoadQual loadQual)
3271     {
3272         NanoAssertMsgf(0,
3273             "LIR LoadQual error (%s): '%s' loadQual is '%d'",
3274             whereInPipeline, what, loadQual);
3275     }
3276
3277     void ValidateWriter::checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins)
3278     {
3279         // We could introduce a LTy_B32 type in the type system but that's a
3280         // bit weird because its representation is identical to LTy_I.  It's
3281         // easier to just do this check structurally.  Also, optimization can
3282         // cause the condition to become a LIR_immi.
3283         if (!ins->isCmp() && !ins->isImmI())
3284             errorStructureShouldBe(op, "argument", argN, ins, "a condition or 32-bit constant");
3285     }
3286
3287     void ValidateWriter::checkLInsIsNull(LOpcode op, int argN, LIns* ins)
3288     {
3289         if (ins)
3290             errorStructureShouldBe(op, "argument", argN, ins, NULL);
3291     }
3292
3293     void ValidateWriter::checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2)
3294     {
3295         if (!ins->isop(op2))
3296             errorStructureShouldBe(op, "argument", argN, ins, lirNames[op2]);
3297     }
3298
3299     ValidateWriter::ValidateWriter(LirWriter *out, LInsPrinter* printer, const char* where)
3300         : LirWriter(out), printer(printer), whereInPipeline(where),
3301           checkAccSetExtras(0)
3302     {}
3303
3304     LIns* ValidateWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet,
3305                                   LoadQual loadQual)
3306     {
3307         checkAccSet(op, base, d, accSet);
3308
3309         switch (loadQual) {
3310         case LOAD_CONST:
3311         case LOAD_NORMAL:
3312         case LOAD_VOLATILE:
3313             break;
3314         default:
3315             errorLoadQual(lirNames[op], loadQual);
3316             break;
3317         }
3318
3319
3320         int nArgs = 1;
3321         LTy formals[1] = { LTy_P };
3322         LIns* args[1] = { base };
3323
3324         switch (op) {
3325         case LIR_ldi:
3326         case LIR_ldd:
3327         case LIR_lduc2ui:
3328         case LIR_ldus2ui:
3329         case LIR_ldc2i:
3330         case LIR_lds2i:
3331         case LIR_ldf2d:
3332         CASE64(LIR_ldq:)
3333             break;
3334         default:
3335             NanoAssert(0);
3336         }
3337
3338         typeCheckArgs(op, nArgs, formals, args);
3339
3340         return out->insLoad(op, base, d, accSet, loadQual);
3341     }
3342
3343     LIns* ValidateWriter::insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet)
3344     {
3345         checkAccSet(op, base, d, accSet);
3346
3347         int nArgs = 2;
3348         LTy formals[2] = { LTy_V, LTy_P };     // LTy_V is overwritten shortly
3349         LIns* args[2] = { value, base };
3350
3351         switch (op) {
3352         case LIR_sti2c:
3353         case LIR_sti2s:
3354         case LIR_sti:
3355             formals[0] = LTy_I;
3356             break;
3357
3358 #ifdef NANOJIT_64BIT
3359         case LIR_stq:
3360             formals[0] = LTy_Q;
3361             break;
3362 #endif
3363
3364         case LIR_std:
3365         case LIR_std2f:
3366             formals[0] = LTy_D;
3367             break;
3368
3369         default:
3370             NanoAssert(0);
3371         }
3372
3373         typeCheckArgs(op, nArgs, formals, args);
3374
3375         return out->insStore(op, value, base, d, accSet);
3376     }
3377
3378     LIns* ValidateWriter::ins0(LOpcode op)
3379     {
3380         switch (op) {
3381         case LIR_start:
3382         case LIR_regfence:
3383         case LIR_label:
3384             break;
3385         default:
3386             NanoAssert(0);
3387         }
3388
3389         // No args to type-check.
3390
3391         return out->ins0(op);
3392     }
3393
3394     LIns* ValidateWriter::ins1(LOpcode op, LIns* a)
3395     {
3396         int nArgs = 1;
3397         LTy formals[1];
3398         LIns* args[1] = { a };
3399
3400         switch (op) {
3401         case LIR_negi:
3402         case LIR_noti:
3403         case LIR_i2d:
3404         case LIR_ui2d:
3405         case LIR_livei:
3406         case LIR_reti:
3407             formals[0] = LTy_I;
3408             break;
3409
3410 #ifdef NANOJIT_64BIT
3411         case LIR_i2q:
3412         case LIR_ui2uq:
3413             formals[0] = LTy_I;
3414             break;
3415
3416         case LIR_q2i:
3417         case LIR_qasd:
3418         case LIR_retq:
3419         case LIR_liveq:
3420             formals[0] = LTy_Q;
3421             break;
3422 #endif
3423
3424 #if defined NANOJIT_IA32 || defined NANOJIT_X64
3425         case LIR_modi:       // see LIRopcode.tbl for why 'mod' is unary
3426             checkLInsHasOpcode(op, 1, a, LIR_divi);
3427             formals[0] = LTy_I;
3428             break;
3429 #endif
3430
3431 #if NJ_SOFTFLOAT_SUPPORTED
3432         case LIR_dlo2i:
3433         case LIR_dhi2i:
3434             formals[0] = LTy_D;
3435             break;
3436
3437         case LIR_hcalli:
3438             // The operand of a LIR_hcalli is LIR_calli, even though the
3439             // function being called has a return type of LTy_D.
3440             checkLInsHasOpcode(op, 1, a, LIR_calli);
3441             formals[0] = LTy_I;
3442             break;
3443 #endif
3444
3445         case LIR_negd:
3446         case LIR_retd:
3447         case LIR_lived:
3448         case LIR_d2i:
3449         CASE64(LIR_dasq:)
3450             formals[0] = LTy_D;
3451             break;
3452
3453         case LIR_file:
3454         case LIR_line:
3455             // These will never get hit since VTUNE implies !DEBUG.  Ignore for the moment.
3456             nArgs = 0;
3457             break;
3458
3459         default:
3460             NanoAssertMsgf(0, "%s\n", lirNames[op]);
3461         }
3462
3463         typeCheckArgs(op, nArgs, formals, args);
3464
3465         return out->ins1(op, a);
3466     }
3467
3468     LIns* ValidateWriter::ins2(LOpcode op, LIns* a, LIns* b)
3469     {
3470         int nArgs = 2;
3471         LTy formals[2];
3472         LIns* args[2] = { a, b };
3473
3474         switch (op) {
3475         case LIR_addi:
3476         case LIR_subi:
3477         case LIR_muli:
3478         CASE86(LIR_divi:)
3479         case LIR_andi:
3480         case LIR_ori:
3481         case LIR_xori:
3482         case LIR_lshi:
3483         case LIR_rshi:
3484         case LIR_rshui:
3485         case LIR_eqi:
3486         case LIR_lti:
3487         case LIR_gti:
3488         case LIR_lei:
3489         case LIR_gei:
3490         case LIR_ltui:
3491         case LIR_gtui:
3492         case LIR_leui:
3493         case LIR_geui:
3494             formals[0] = LTy_I;
3495             formals[1] = LTy_I;
3496             break;
3497
3498 #if NJ_SOFTFLOAT_SUPPORTED
3499         case LIR_ii2d:
3500             formals[0] = LTy_I;
3501             formals[1] = LTy_I;
3502             break;
3503 #endif
3504
3505 #ifdef NANOJIT_64BIT
3506         case LIR_andq:
3507         case LIR_orq:
3508         case LIR_xorq:
3509         case LIR_addq:
3510         case LIR_subq:
3511         case LIR_eqq:
3512         case LIR_ltq:
3513         case LIR_gtq:
3514         case LIR_leq:
3515         case LIR_geq:
3516         case LIR_ltuq:
3517         case LIR_gtuq:
3518         case LIR_leuq:
3519         case LIR_geuq:
3520             formals[0] = LTy_Q;
3521             formals[1] = LTy_Q;
3522             break;
3523
3524         case LIR_lshq:
3525         case LIR_rshq:
3526         case LIR_rshuq:
3527             formals[0] = LTy_Q;
3528             formals[1] = LTy_I;
3529             break;
3530 #endif
3531
3532         case LIR_addd:
3533         case LIR_subd:
3534         case LIR_muld:
3535         case LIR_divd:
3536         case LIR_eqd:
3537         case LIR_gtd:
3538         case LIR_ltd:
3539         case LIR_led:
3540         case LIR_ged:
3541             formals[0] = LTy_D;
3542             formals[1] = LTy_D;
3543             break;
3544
3545         default:
3546             NanoAssert(0);
3547         }
3548
3549         typeCheckArgs(op, nArgs, formals, args);
3550
3551         return out->ins2(op, a, b);
3552     }
3553
3554     LIns* ValidateWriter::ins3(LOpcode op, LIns* a, LIns* b, LIns* c)
3555     {
3556         int nArgs = 3;
3557         LTy formals[3] = { LTy_I, LTy_V, LTy_V };   // LTy_V gets overwritten
3558         LIns* args[3] = { a, b, c };
3559
3560         switch (op) {
3561         case LIR_cmovi:
3562             checkLInsIsACondOrConst(op, 1, a);
3563             formals[1] = LTy_I;
3564             formals[2] = LTy_I;
3565             break;
3566
3567 #ifdef NANOJIT_64BIT
3568         case LIR_cmovq:
3569             checkLInsIsACondOrConst(op, 1, a);
3570             formals[1] = LTy_Q;
3571             formals[2] = LTy_Q;
3572             break;
3573 #endif
3574
3575         case LIR_cmovd:
3576             checkLInsIsACondOrConst(op, 1, a);
3577             formals[1] = LTy_D;
3578             formals[2] = LTy_D;
3579             break;
3580
3581         default:
3582             NanoAssert(0);
3583         }
3584
3585         typeCheckArgs(op, nArgs, formals, args);
3586
3587         return out->ins3(op, a, b, c);
3588     }
3589
3590     LIns* ValidateWriter::insParam(int32_t arg, int32_t kind)
3591     {
3592         return out->insParam(arg, kind);
3593     }
3594
3595     LIns* ValidateWriter::insImmI(int32_t imm)
3596     {
3597         return out->insImmI(imm);
3598     }
3599
3600 #ifdef NANOJIT_64BIT
3601     LIns* ValidateWriter::insImmQ(uint64_t imm)
3602     {
3603         return out->insImmQ(imm);
3604     }
3605 #endif
3606
3607     LIns* ValidateWriter::insImmD(double d)
3608     {
3609         return out->insImmD(d);
3610     }
3611
3612     static const char* argtypeNames[] = {
3613         "void",     // ARGTYPE_V  = 0
3614         "int32_t",  // ARGTYPE_I  = 1
3615         "uint32_t", // ARGTYPE_UI = 2
3616         "uint64_t", // ARGTYPE_Q  = 3
3617         "double"    // ARGTYPE_D  = 4
3618     };
3619
3620     LIns* ValidateWriter::insCall(const CallInfo *ci, LIns* args0[])
3621     {
3622         ArgType argTypes[MAXARGS];
3623         uint32_t nArgs = ci->getArgTypes(argTypes);
3624         LTy formals[MAXARGS];
3625         LIns* args[MAXARGS];    // in left-to-right order, unlike args0[]
3626
3627         LOpcode op = getCallOpcode(ci);
3628         ArgType retType = ci->returnType();
3629
3630         if ((op == LIR_callv) != (retType == ARGTYPE_V) ||
3631             (op == LIR_calli) != (retType == ARGTYPE_UI ||
3632                                   retType == ARGTYPE_I) ||
3633 #ifdef NANOJIT_64BIT
3634             (op == LIR_callq) != (retType == ARGTYPE_Q) ||
3635 #endif
3636             (op == LIR_calld) != (retType == ARGTYPE_D)) {
3637             NanoAssertMsgf(0,
3638                 "LIR structure error (%s): return type mismatch: opcode %s with %s return type",
3639                 whereInPipeline, lirNames[op], argtypeNames[retType]);
3640         }
3641
3642         if (op == LIR_callv && ci->_isPure) {
3643             // Since nobody can use the result of a void call, any pure call
3644             // would just be dead.  This is probably a mistake.
3645             NanoAssertMsgf(0,
3646                 "LIR structure error (%s): LIR_callv must only be used with nonpure functions.",
3647                 whereInPipeline);
3648         }
3649
3650         if (ci->_isPure && ci->_storeAccSet != ACCSET_NONE)
3651             errorAccSet(ci->_name, ci->_storeAccSet, "it should be ACCSET_NONE for pure functions");
3652
3653         // This loop iterates over the args from right-to-left (because arg()
3654         // and getArgTypes() use right-to-left order), but puts the results
3655         // into formals[] and args[] in left-to-right order so that arg
3656         // numbers in error messages make sense to the user.
3657         for (uint32_t i = 0; i < nArgs; i++) {
3658             uint32_t i2 = nArgs - i - 1;    // converts right-to-left to left-to-right
3659             switch (argTypes[i]) {
3660             case ARGTYPE_I:
3661             case ARGTYPE_UI:         formals[i2] = LTy_I;   break;
3662 #ifdef NANOJIT_64BIT
3663             case ARGTYPE_Q:         formals[i2] = LTy_Q;   break;
3664 #endif
3665             case ARGTYPE_D:         formals[i2] = LTy_D;   break;
3666             default: NanoAssertMsgf(0, "%d %s\n", argTypes[i],ci->_name); formals[i2] = LTy_V;  break;
3667             }
3668             args[i2] = args0[i];
3669         }
3670
3671         typeCheckArgs(op, nArgs, formals, args);
3672
3673         return out->insCall(ci, args0);
3674     }
3675
3676     LIns* ValidateWriter::insGuard(LOpcode op, LIns *cond, GuardRecord *gr)
3677     {
3678         int nArgs = -1;     // init to shut compilers up
3679         LTy formals[1];
3680         LIns* args[1];
3681
3682         switch (op) {
3683         case LIR_x:
3684         case LIR_xbarrier:
3685             checkLInsIsNull(op, 1, cond);
3686             nArgs = 0;
3687             break;
3688
3689         case LIR_xt:
3690         case LIR_xf:
3691             checkLInsIsACondOrConst(op, 1, cond);
3692             nArgs = 1;
3693             formals[0] = LTy_I;
3694             args[0] = cond;
3695             break;
3696
3697         case LIR_xtbl:
3698             nArgs = 1;
3699             formals[0] = LTy_I;   // unlike xt/xf/jt/jf, this is an index, not a condition
3700             args[0] = cond;
3701             break;
3702
3703         default:
3704             NanoAssert(0);
3705         }
3706
3707         typeCheckArgs(op, nArgs, formals, args);
3708
3709         return out->insGuard(op, cond, gr);
3710     }
3711
3712     LIns* ValidateWriter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord* gr)
3713     {
3714         int nArgs = 2;
3715         LTy formals[2] = { LTy_I, LTy_I };
3716         LIns* args[2] = { a, b };
3717
3718         switch (op) {
3719         case LIR_addxovi:
3720         case LIR_subxovi:
3721         case LIR_mulxovi:
3722             break;
3723
3724         default:
3725             NanoAssert(0);
3726         }
3727
3728         typeCheckArgs(op, nArgs, formals, args);
3729
3730         return out->insGuardXov(op, a, b, gr);
3731     }
3732
3733     LIns* ValidateWriter::insBranch(LOpcode op, LIns* cond, LIns* to)
3734     {
3735         int nArgs = -1;     // init to shut compilers up
3736         LTy formals[1];
3737         LIns* args[1];
3738
3739         switch (op) {
3740         case LIR_j:
3741             checkLInsIsNull(op, 1, cond);
3742             nArgs = 0;
3743             break;
3744
3745         case LIR_jt:
3746         case LIR_jf:
3747             checkLInsIsACondOrConst(op, 1, cond);
3748             nArgs = 1;
3749             formals[0] = LTy_I;
3750             args[0] = cond;
3751             break;
3752
3753         default:
3754             NanoAssert(0);
3755         }
3756
3757         // We check that target is a label in ValidateReader because it may
3758         // not have been set here.
3759
3760         typeCheckArgs(op, nArgs, formals, args);
3761
3762         return out->insBranch(op, cond, to);
3763     }
3764
3765     LIns* ValidateWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* to)
3766     {
3767         int nArgs = 2;
3768         LTy formals[2];
3769         LIns* args[2] = { a, b };
3770
3771         switch (op) {
3772         case LIR_addjovi:
3773         case LIR_subjovi:
3774         case LIR_muljovi:
3775             formals[0] = LTy_I;
3776             formals[1] = LTy_I;
3777             break;
3778
3779 #ifdef NANOJIT_64BIT
3780         case LIR_addjovq:
3781         case LIR_subjovq:
3782             formals[0] = LTy_Q;
3783             formals[1] = LTy_Q;
3784             break;
3785 #endif
3786         default:
3787             NanoAssert(0);
3788         }
3789
3790         // We check that target is a label in ValidateReader because it may
3791         // not have been set here.
3792
3793         typeCheckArgs(op, nArgs, formals, args);
3794
3795         return out->insBranchJov(op, a, b, to);
3796     }
3797
3798     LIns* ValidateWriter::insAlloc(int32_t size)
3799     {
3800         return out->insAlloc(size);
3801     }
3802
3803     LIns* ValidateWriter::insJtbl(LIns* index, uint32_t size)
3804     {
3805         int nArgs = 1;
3806         LTy formals[1] = { LTy_I };
3807         LIns* args[1] = { index };
3808
3809         typeCheckArgs(LIR_jtbl, nArgs, formals, args);
3810
3811         // We check that all jump table entries are labels in ValidateReader
3812         // because they won't have been set here.
3813
3814         return out->insJtbl(index, size);
3815     }
3816
3817     ValidateReader::ValidateReader(LirFilter* in) : LirFilter(in)
3818         {}
3819
3820     LIns* ValidateReader::read()
3821     {
3822         LIns *ins = in->read();
3823         switch (ins->opcode()) {
3824         case LIR_jt:
3825         case LIR_jf:
3826         case LIR_j:
3827             NanoAssert(ins->getTarget() && ins->oprnd2()->isop(LIR_label));
3828             break;
3829
3830         case LIR_addjovi:
3831         case LIR_subjovi:
3832         case LIR_muljovi:
3833         CASE64(LIR_addjovq:)
3834         CASE64(LIR_subjovq:)
3835             NanoAssert(ins->getTarget() && ins->oprnd3()->isop(LIR_label));
3836             break;
3837
3838         case LIR_jtbl: {
3839             uint32_t tableSize = ins->getTableSize();
3840             NanoAssert(tableSize > 0);
3841             for (uint32_t i = 0; i < tableSize; i++) {
3842                 LIns* target = ins->getTarget(i);
3843                 NanoAssert(target);
3844                 NanoAssert(target->isop(LIR_label));
3845             }
3846             break;
3847         }
3848         default:
3849             ;
3850         }
3851         return ins;
3852     }
3853
3854 #endif
3855 #endif
3856
3857 }