js/src/lirasm/lirasm.cpp

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2  * vim: set ts=4 sw=4 et tw=99:
   3  * ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is LIR Assembler code, released 2009.
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Mozilla Corporation.
  20  * Portions created by the Initial Developer are Copyright (C) 2009
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *  Graydon Hoare <graydon@mozilla.com>
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either of the GNU General Public License Version 2 or later (the "GPL"),
  28  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include <vector>
  41 #include <algorithm>
  42 #include <map>
  43 #include <string>
  44 #include <iostream>
  45 #include <sstream>
  46 #include <fstream>
  47
  48 #ifdef AVMPLUS_UNIX
  49 #include <sys/types.h>
  50 #include <sys/stat.h>
  51 #include <fcntl.h>
  52 #endif
  53
  54 #include <stdlib.h>
  55 #include <math.h>
  56 #include <ctype.h>
  57 #include <assert.h>
  58
  59 #include "nanojit/nanojit.h"
  60
  61 using namespace nanojit;
  62 using namespace std;
  63
  64 /* Allocator SPI implementation. */
  65
  66 void*
  67 nanojit::Allocator::allocChunk(size_t nbytes, bool /*fallible*/)
  68 {
  69     void *p = malloc(nbytes);
  70     if (!p)
  71         exit(1);
  72     return p;
  73 }
  74
  75 void
  76 nanojit::Allocator::freeChunk(void *p) {
  77     free(p);
  78 }
  79
  80 void
  81 nanojit::Allocator::postReset() {
  82 }
  83
  84
  85 struct LasmSideExit : public SideExit {
  86     size_t line;
  87 };
  88
  89
  90 /* LIR SPI implementation */
  91
  92 int
  93 nanojit::StackFilter::getTop(LIns*)
  94 {
  95     return 0;
  96 }
  97
  98 // We lump everything into a single access region for lirasm.
  99 static const AccSet ACCSET_OTHER = (1 << 0);
 100 static const uint8_t LIRASM_NUM_USED_ACCS = 1;
 101
 102 #if defined NJ_VERBOSE
 103 void
 104 nanojit::LInsPrinter::formatGuard(InsBuf *buf, LIns *ins)
 105 {
 106     RefBuf b1, b2;
 107     LasmSideExit *x = (LasmSideExit *)ins->record()->exit;
 108     VMPI_snprintf(buf->buf, buf->len,
 109             "%s: %s %s -> line=%ld (GuardID=%03d)",
 110             formatRef(&b1, ins),
 111             lirNames[ins->opcode()],
 112             ins->oprnd1() ? formatRef(&b2, ins->oprnd1()) : "",
 113             (long)x->line,
 114             ins->record()->profGuardID);
 115 }
 116
 117 void
 118 nanojit::LInsPrinter::formatGuardXov(InsBuf *buf, LIns *ins)
 119 {
 120     RefBuf b1, b2, b3;
 121     LasmSideExit *x = (LasmSideExit *)ins->record()->exit;
 122     VMPI_snprintf(buf->buf, buf->len,
 123             "%s = %s %s, %s -> line=%ld (GuardID=%03d)",
 124             formatRef(&b1, ins),
 125             lirNames[ins->opcode()],
 126             formatRef(&b2, ins->oprnd1()),
 127             formatRef(&b3, ins->oprnd2()),
 128             (long)x->line,
 129             ins->record()->profGuardID);
 130 }
 131
 132 const char*
 133 nanojit::LInsPrinter::accNames[] = {
 134     "o",    // (1 << 0) == ACCSET_OTHER
 135     "?", "?", "?", "?", "?", "?", "?", "?", "?", "?",   //  1..10 (unused)
 136     "?", "?", "?", "?", "?", "?", "?", "?", "?", "?",   // 11..20 (unused)
 137     "?", "?", "?", "?", "?", "?", "?", "?", "?", "?",   // 21..30 (unused)
 138     "?"                                                 //     31 (unused)
 139 };
 140 #endif
 141
 142 #ifdef DEBUG
 143 void ValidateWriter::checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet)
 144 {
 145     (void)op;
 146     (void)base;
 147     (void)disp;
 148     NanoAssert(accSet == ACCSET_OTHER);
 149 }
 150 #endif
 151
 152 typedef int32_t (FASTCALL *RetInt)();
 153 typedef int64_t (FASTCALL *RetQuad)();
 154 typedef double (FASTCALL *RetDouble)();
 155 typedef GuardRecord* (FASTCALL *RetGuard)();
 156
 157 struct Function {
 158     const char *name;
 159     struct nanojit::CallInfo callInfo;
 160 };
 161
 162 enum ReturnType {
 163     RT_INT = 1,
 164 #ifdef NANOJIT_64BIT
 165     RT_QUAD = 2,
 166 #endif
 167     RT_DOUBLE = 4,
 168     RT_GUARD = 8
 169 };
 170
 171 #ifdef DEBUG
 172 #define DEBUG_ONLY_NAME(name)   ,#name
 173 #else
 174 #define DEBUG_ONLY_NAME(name)
 175 #endif
 176
 177 #define CI(name, args) \
 178     {(uintptr_t) (&name), args, nanojit::ABI_CDECL, /*isPure*/0, ACCSET_STORE_ANY \
 179      DEBUG_ONLY_NAME(name)}
 180
 181 #define FN(name, args) \
 182     {#name, CI(name, args)}
 183
 184 enum LirTokenType {
 185     NAME, NUMBER, PUNCT, NEWLINE
 186 };
 187
 188 struct LirToken {
 189     LirTokenType type;
 190     string data;
 191     int lineno;
 192 };
 193
 194 inline bool
 195 startsWith(const string &s, const string &prefix)
 196 {
 197     return s.size() >= prefix.size() && s.compare(0, prefix.length(), prefix) == 0;
 198 }
 199
 200 // LIR files must be ASCII, for simplicity.
 201 class LirTokenStream {
 202 public:
 203     LirTokenStream(istream &in) : mIn(in), mLineno(0) {}
 204
 205     bool get(LirToken &token) {
 206         if (mLine.empty()) {
 207             if (!getline(mIn, mLine))
 208                 return false;
 209             mLine += '\n';
 210             mLineno++;
 211         }
 212         mLine.erase(0, mLine.find_first_not_of(" \t\v\r"));
 213         char c = mLine[0];
 214         size_t e = mLine.find_first_not_of("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$.+-");
 215         if (startsWith(mLine, "->")) {
 216             mLine.erase(0, 2);
 217             token.type = PUNCT;
 218             token.data = "->";
 219         } else if (e > 0) {
 220             string s = mLine.substr(0, e);
 221             mLine.erase(0, e);
 222             if (e > 1 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
 223                 token.type = NUMBER;
 224             else if (isdigit(s[0]) || (e > 1 && s[0] == '.' && isdigit(s[1])))
 225                 token.type = NUMBER;
 226             else
 227                 token.type = NAME;
 228             token.data = s;
 229         } else if (strchr(":,=[]()", c)) {
 230             token.type = PUNCT;
 231             token.data = c;
 232             mLine.erase(0, 1);
 233         } else if (c == ';' || c == '\n') {
 234             token.type = NEWLINE;
 235             token.data.clear();
 236             mLine.clear();
 237         } else {
 238             cerr << "line " << mLineno << ": error: Unrecognized character in file." << endl;
 239             return false;
 240         }
 241
 242         token.lineno = mLineno;
 243         return true;
 244     }
 245
 246     bool eat(LirTokenType type, const char *exact = NULL) {
 247         LirToken token;
 248         return (get(token) && token.type == type && (exact == NULL || token.data == exact));
 249     }
 250
 251     bool getName(string &name) {
 252         LirToken t;
 253         if (get(t) && t.type == NAME) {
 254             name = t.data;
 255             return true;
 256         }
 257         return false;
 258     }
 259
 260 private:
 261     istream &mIn;
 262     string mLine;
 263     int mLineno;
 264 };
 265
 266 class LirasmFragment {
 267 public:
 268     union {
 269         RetInt rint;
 270 #ifdef NANOJIT_64BIT
 271         RetQuad rquad;
 272 #endif
 273         RetDouble rdouble;
 274         RetGuard rguard;
 275     };
 276     ReturnType mReturnType;
 277     Fragment *fragptr;
 278     map<string, LIns*> mLabels;
 279 };
 280
 281 typedef map<string, LirasmFragment> Fragments;
 282
 283 class Lirasm {
 284 public:
 285     Lirasm(bool verbose);
 286     ~Lirasm();
 287
 288     void assemble(istream &in, bool optimize);
 289     void assembleRandom(int nIns, bool optimize);
 290     bool lookupFunction(const string &name, CallInfo *&ci);
 291
 292     LirBuffer *mLirbuf;
 293     LogControl mLogc;
 294     avmplus::AvmCore mCore;
 295     Allocator mAlloc;
 296     CodeAlloc mCodeAlloc;
 297     bool mVerbose;
 298     Fragments mFragments;
 299     Assembler mAssm;
 300     map<string, LOpcode> mOpMap;
 301
 302     void bad(const string &msg) {
 303         cerr << "error: " << msg << endl;
 304         exit(1);
 305     }
 306
 307 private:
 308     void handlePatch(LirTokenStream &in);
 309 };
 310
 311 class FragmentAssembler {
 312 public:
 313     FragmentAssembler(Lirasm &parent, const string &fragmentName, bool optimize);
 314     ~FragmentAssembler();
 315
 316     void assembleFragment(LirTokenStream &in,
 317                           bool implicitBegin,
 318                           const LirToken *firstToken);
 319
 320     void assembleRandomFragment(int nIns);
 321
 322 private:
 323     static uint32_t sProfId;
 324     // Prohibit copying.
 325     FragmentAssembler(const FragmentAssembler &);
 326     FragmentAssembler & operator=(const FragmentAssembler &);
 327     LasmSideExit *createSideExit();
 328     GuardRecord *createGuardRecord(LasmSideExit *exit);
 329
 330     Lirasm &mParent;
 331     const string mFragName;
 332     Fragment *mFragment;
 333     bool optimize;
 334     vector<CallInfo*> mCallInfos;
 335     map<string, LIns*> mLabels;
 336     LirWriter *mLir;
 337     LirBufWriter *mBufWriter;
 338     LirWriter *mCseFilter;
 339     LirWriter *mExprFilter;
 340     LirWriter *mSoftFloatFilter;
 341     LirWriter *mVerboseWriter;
 342     LirWriter *mValidateWriter1;
 343     LirWriter *mValidateWriter2;
 344     multimap<string, LIns *> mFwdJumps;
 345
 346     size_t mLineno;
 347     LOpcode mOpcode;
 348     size_t mOpcount;
 349
 350     char mReturnTypeBits;
 351     vector<string> mTokens;
 352
 353     void tokenizeLine(LirTokenStream &in, LirToken &token);
 354     void need(size_t);
 355     LIns *ref(const string &);
 356     LIns *assemble_jump(bool isCond);
 357     LIns *assemble_load();
 358     LIns *assemble_call(const string &);
 359     LIns *assemble_ret(ReturnType rt);
 360     LIns *assemble_guard(bool isCond);
 361     LIns *assemble_guard_xov();
 362     LIns *assemble_jump_jov();
 363     void bad(const string &msg);
 364     void nyi(const string &opname);
 365     void extract_any_label(string &lab, char lab_delim);
 366     void resolve_forward_jumps(string &lab, LIns *ins);
 367     void endFragment();
 368 };
 369
 370 // 'sin' is overloaded on some platforms, so taking its address
 371 // doesn't quite work. Provide a do-nothing function here
 372 // that's not overloaded.
 373 double sinFn(double d) {
 374     return sin(d);
 375 }
 376 #define sin sinFn
 377
 378 double calld1(double x, double i, double y, double l, double x1, double i1, double y1, double l1) {
 379     return x + i * y - l + x1 / i1 - y1 * l1;
 380 }
 381
 382 // The calling tests with mixed argument types are sensible for all platforms, but they highlight
 383 // the differences between the supported ABIs on ARM.
 384
 385 double callid1(int i, double x, double y, int j, int k, double z) {
 386     return (x + y + z) / (double)(i + j + k);
 387 }
 388
 389 double callid2(int i, int j, int k, double x) {
 390     return x / (double)(i + j + k);
 391 }
 392
 393 double callid3(int i, int j, double x, int k, double y, double z) {
 394     return (x + y + z) / (double)(i + j + k);
 395 }
 396
 397 // Simple print function for testing void calls.
 398 void printi(int x) {
 399     cout << x << endl;
 400 }
 401
 402 Function functions[] = {
 403     FN(puts,    CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_P)),
 404     FN(sin,     CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_D)),
 405     FN(malloc,  CallInfo::typeSig1(ARGTYPE_P, ARGTYPE_P)),
 406     FN(free,    CallInfo::typeSig1(ARGTYPE_V, ARGTYPE_P)),
 407     FN(calld1,  CallInfo::typeSig8(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
 408                                    ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D)),
 409     FN(callid1, CallInfo::typeSig6(ARGTYPE_D, ARGTYPE_I, ARGTYPE_D, ARGTYPE_D,
 410                                    ARGTYPE_I, ARGTYPE_I, ARGTYPE_D)),
 411     FN(callid2, CallInfo::typeSig4(ARGTYPE_D, ARGTYPE_I, ARGTYPE_I, ARGTYPE_I, ARGTYPE_D)),
 412     FN(callid3, CallInfo::typeSig6(ARGTYPE_D, ARGTYPE_I, ARGTYPE_I, ARGTYPE_D,
 413                                    ARGTYPE_I, ARGTYPE_D, ARGTYPE_D)),
 414     FN(printi,  CallInfo::typeSig1(ARGTYPE_V, ARGTYPE_I)),
 415 };
 416
 417 template<typename out, typename in> out
 418 lexical_cast(in arg)
 419 {
 420     stringstream tmp;
 421     out ret;
 422     if ((tmp << arg && tmp >> ret && tmp.eof()))
 423         return ret;
 424     cerr << "bad lexical cast from " << arg << endl;
 425     exit(1);
 426 }
 427
 428 int32_t
 429 immI(const string &s)
 430 {
 431     stringstream tmp(s);
 432     int32_t ret;
 433     if ((s.find("0x") == 0 || s.find("0X") == 0) &&
 434         (tmp >> hex >> ret && tmp.eof())) {
 435         return ret;
 436     }
 437     return lexical_cast<int32_t>(s);
 438 }
 439
 440 uint64_t
 441 immQ(const string &s)
 442 {
 443     stringstream tmp(s);
 444     uint64_t ret;
 445     if ((s.find("0x") == 0 || s.find("0X") == 0) &&
 446         (tmp >> hex >> ret && tmp.eof())) {
 447         return ret;
 448     }
 449     return lexical_cast<uint64_t>(s);
 450 }
 451
 452 double
 453 immD(const string &s)
 454 {
 455     return lexical_cast<double>(s);
 456 }
 457
 458 template<typename t> t
 459 pop_front(vector<t> &vec)
 460 {
 461     if (vec.empty()) {
 462         cerr << "pop_front of empty vector" << endl;
 463         exit(1);
 464     }
 465    t tmp = vec[0];
 466    vec.erase(vec.begin());
 467    return tmp;
 468 }
 469
 470 void
 471 dep_u8(char *&buf, uint8_t byte, uint32_t &cksum)
 472 {
 473     sprintf(buf, "%2.2X", byte);
 474     cksum += byte;
 475     buf += 2;
 476 }
 477
 478 void
 479 dep_u32(char *&buf, uint32_t word, uint32_t &cksum)
 480 {
 481     dep_u8(buf, (uint8_t)((word >> 24) & 0xff), cksum);
 482     dep_u8(buf, (uint8_t)((word >> 16) & 0xff), cksum);
 483     dep_u8(buf, (uint8_t)((word >> 8) & 0xff), cksum);
 484     dep_u8(buf, (uint8_t)((word) & 0xff), cksum);
 485 }
 486
 487 void
 488 dump_srecords(ostream &, Fragment *)
 489 {
 490     // FIXME: Disabled until we work out a sane way to walk through
 491     // code chunks under the new CodeAlloc regime.
 492 /*
 493     // Write S-records. Can only do 4-byte addresses at the moment.
 494
 495     // FIXME: this presently dumps out the entire set of code pages
 496     // written-to, which means it often dumps *some* bytes on the last
 497     // page that are not necessarily initialized at all; they're
 498     // beyond the last instruction written. Fix this to terminate
 499     // s-record writing early.
 500
 501     assert(sizeof(uintptr_t) == 4);
 502     for (Page *page = frag->pages(); page; page = page->next) {
 503         size_t step = 32;
 504         uintptr_t p0 = (uintptr_t) &(page->code);
 505         for (uintptr_t p = p0; p < p0 + sizeof(page->code); p += step) {
 506             char buf[1024];
 507
 508             // S-record type S3: 8-char / 4-byte address.
 509             //
 510             //     +2 char code 'S3'.
 511             //     +2 char / 1 byte count of remaining bytes (37 = addr, payload, cksum).
 512             //     +8 char / 4 byte addr.
 513             //    ---
 514             //    +64 char / 32 byte payload.
 515             //    ---
 516             //     +2 char / 1 byte checksum.
 517
 518             uint32_t cksum = 0;
 519             size_t count = sizeof(p) + step + 1;
 520
 521             sprintf(buf, "S3");
 522
 523             char *b = buf + 2; // 2 chars for the "S3" code.
 524
 525             dep_u8(b, (uint8_t) count, cksum); // Count of data bytes
 526             dep_u32(b, p, cksum); // Address of the data byte being emitted
 527             uint8_t *c = (uint8_t*) p;
 528             for (size_t i = 0; i < step; ++i) { // Actual object code being emitted
 529                 dep_u8(b, c[i], cksum);
 530             }
 531             dep_u8(b, (uint8_t)((~cksum) & 0xff), cksum);
 532             out << string(buf) << endl;
 533         }
 534     }
 535 */
 536 }
 537
 538
 539
 540 uint32_t
 541 FragmentAssembler::sProfId = 0;
 542
 543 FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName, bool optimize)
 544     : mParent(parent), mFragName(fragmentName), optimize(optimize),
 545       mBufWriter(NULL), mCseFilter(NULL), mExprFilter(NULL), mSoftFloatFilter(NULL), mVerboseWriter(NULL),
 546       mValidateWriter1(NULL), mValidateWriter2(NULL)
 547 {
 548     mFragment = new Fragment(NULL verbose_only(, (mParent.mLogc.lcbits &
 549                                                   nanojit::LC_FragProfile) ?
 550                                                   sProfId++ : 0));
 551     mFragment->lirbuf = mParent.mLirbuf;
 552     mParent.mFragments[mFragName].fragptr = mFragment;
 553
 554     mLir = mBufWriter  = new LirBufWriter(mParent.mLirbuf, nanojit::AvmCore::config);
 555 #ifdef DEBUG
 556     if (optimize) {     // don't re-validate if no optimization has taken place
 557         mLir = mValidateWriter2 =
 558             new ValidateWriter(mLir, mFragment->lirbuf->printer, "end of writer pipeline");
 559     }
 560 #endif
 561 #ifdef DEBUG
 562     if (mParent.mVerbose) {
 563         mLir = mVerboseWriter = new VerboseWriter(mParent.mAlloc, mLir,
 564                                                   mParent.mLirbuf->printer,
 565                                                   &mParent.mLogc);
 566     }
 567 #endif
 568     if (optimize) {
 569         mLir = mCseFilter = new CseFilter(mLir, LIRASM_NUM_USED_ACCS, mParent.mAlloc);
 570     }
 571 #if NJ_SOFTFLOAT_SUPPORTED
 572     if (avmplus::AvmCore::config.soft_float) {
 573         mLir = new SoftFloatFilter(mLir);
 574     }
 575 #endif
 576     if (optimize) {
 577         mLir = mExprFilter = new ExprFilter(mLir);
 578     }
 579 #ifdef DEBUG
 580     mLir = mValidateWriter1 =
 581             new ValidateWriter(mLir, mFragment->lirbuf->printer, "start of writer pipeline");
 582 #endif
 583
 584     mReturnTypeBits = 0;
 585     mLir->ins0(LIR_start);
 586     for (int i = 0; i < nanojit::NumSavedRegs; ++i)
 587         mLir->insParam(i, 1);
 588
 589     mLineno = 0;
 590 }
 591
 592 FragmentAssembler::~FragmentAssembler()
 593 {
 594     delete mValidateWriter1;
 595     delete mValidateWriter2;
 596     delete mVerboseWriter;
 597     delete mExprFilter;
 598     delete mSoftFloatFilter;
 599     delete mCseFilter;
 600     delete mBufWriter;
 601 }
 602
 603
 604 void
 605 FragmentAssembler::bad(const string &msg)
 606 {
 607     cerr << "line " << mLineno << ": " << msg << endl;
 608     exit(1);
 609 }
 610
 611 void
 612 FragmentAssembler::nyi(const string &opname)
 613 {
 614     cerr << "line " << mLineno << ": '" << opname << "' not yet implemented, sorry" << endl;
 615     exit(1);
 616 }
 617
 618 void
 619 FragmentAssembler::need(size_t n)
 620 {
 621     if (mTokens.size() != n) {
 622         bad("need " + lexical_cast<string>(n)
 623             + " tokens, have " + lexical_cast<string>(mTokens.size()));
 624     }
 625 }
 626
 627 LIns *
 628 FragmentAssembler::ref(const string &lab)
 629 {
 630     if (mLabels.find(lab) == mLabels.end())
 631         bad("unknown label '" + lab + "'");
 632     return mLabels.find(lab)->second;
 633 }
 634
 635 LIns *
 636 FragmentAssembler::assemble_jump(bool isCond)
 637 {
 638     LIns *condition;
 639
 640     if (isCond) {
 641         need(2);
 642         string cond = pop_front(mTokens);
 643         condition = ref(cond);
 644     } else {
 645         need(1);
 646         condition = NULL;
 647     }
 648     string name = pop_front(mTokens);
 649     if (mLabels.find(name) != mLabels.end()) {
 650         LIns *target = ref(name);
 651         return mLir->insBranch(mOpcode, condition, target);
 652     } else {
 653         LIns *ins = mLir->insBranch(mOpcode, condition, NULL);
 654 #ifdef __SUNPRO_CC
 655         mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
 656 #else
 657         mFwdJumps.insert(make_pair(name, ins));
 658 #endif
 659         return ins;
 660     }
 661 }
 662
 663 LIns *
 664 FragmentAssembler::assemble_load()
 665 {
 666     // Support implicit immediate-as-second-operand modes
 667     // since, unlike sti/stqi, no immediate-displacement
 668     // load opcodes were defined in LIR.
 669     need(2);
 670     if (mTokens[1].find("0x") == 0 ||
 671         mTokens[1].find("0x") == 0 ||
 672         mTokens[1].find_first_of("0123456789") == 0) {
 673         return mLir->insLoad(mOpcode,
 674                              ref(mTokens[0]),
 675                              immI(mTokens[1]), ACCSET_OTHER);
 676     }
 677     bad("immediate offset required for load");
 678     return NULL;  // not reached
 679 }
 680
 681 LIns *
 682 FragmentAssembler::assemble_call(const string &op)
 683 {
 684     CallInfo *ci = new (mParent.mAlloc) CallInfo;
 685     mCallInfos.push_back(ci);
 686     LIns *args[MAXARGS];
 687     memset(&args[0], 0, sizeof(args));
 688
 689     // Assembler syntax for a call:
 690     //
 691     //   call 0x1234 fastcall a b c
 692     //
 693     // requires at least 2 args,
 694     // fn address immediate and ABI token.
 695
 696     if (mTokens.size() < 2)
 697         bad("need at least address and ABI code for " + op);
 698
 699     string func = pop_front(mTokens);
 700     string abi = pop_front(mTokens);
 701
 702     AbiKind _abi = ABI_CDECL;
 703     if (abi == "fastcall")
 704         _abi = ABI_FASTCALL;
 705     else if (abi == "stdcall")
 706         _abi = ABI_STDCALL;
 707     else if (abi == "thiscall")
 708         _abi = ABI_THISCALL;
 709     else if (abi == "cdecl")
 710         _abi = ABI_CDECL;
 711     else
 712         bad("call abi name '" + abi + "'");
 713
 714     if (mTokens.size() > MAXARGS)
 715     bad("too many args to " + op);
 716
 717     bool isBuiltin = mParent.lookupFunction(func, ci);
 718     if (isBuiltin) {
 719         // Built-in:  use its CallInfo.  Also check (some) CallInfo details
 720         // against those from the call site.
 721         if (_abi != ci->_abi)
 722             bad("invalid calling convention for " + func);
 723
 724         size_t i;
 725         for (i = 0; i < mTokens.size(); ++i) {
 726             args[i] = ref(mTokens[mTokens.size() - (i+1)]);
 727         }
 728         if (i != ci->count_args())
 729             bad("wrong number of arguments for " + func);
 730
 731     } else {
 732         // User-defined function:  infer CallInfo details (ABI, arg types, ret
 733         // type) from the call site.
 734         ci->_abi = _abi;
 735         size_t argc = mTokens.size();
 736         ArgType argTypes[MAXARGS];
 737         for (size_t i = 0; i < argc; ++i) {
 738             NanoAssert(i < MAXARGS);    // should give a useful error msg if this fails
 739             args[i] = ref(mTokens[mTokens.size() - (i+1)]);
 740             if      (args[i]->isD()) argTypes[i] = ARGTYPE_D;
 741 #ifdef NANOJIT_64BIT
 742             else if (args[i]->isQ()) argTypes[i] = ARGTYPE_Q;
 743 #endif
 744             else                     argTypes[i] = ARGTYPE_I;
 745         }
 746
 747         // Select return type from opcode.
 748         ArgType retType = ARGTYPE_P;
 749         if      (mOpcode == LIR_callv) retType = ARGTYPE_V;
 750         else if (mOpcode == LIR_calli) retType = ARGTYPE_I;
 751 #ifdef NANOJIT_64BIT
 752         else if (mOpcode == LIR_callq) retType = ARGTYPE_Q;
 753 #endif
 754         else if (mOpcode == LIR_calld) retType = ARGTYPE_D;
 755         else                           nyi("callh");
 756         ci->_typesig = CallInfo::typeSigN(retType, argc, argTypes);
 757     }
 758
 759     return mLir->insCall(ci, args);
 760 }
 761
 762 LIns *
 763 FragmentAssembler::assemble_ret(ReturnType rt)
 764 {
 765     need(1);
 766     mReturnTypeBits |= rt;
 767     return mLir->ins1(mOpcode, ref(mTokens[0]));
 768 }
 769
 770 LasmSideExit*
 771 FragmentAssembler::createSideExit()
 772 {
 773     LasmSideExit* exit = new (mParent.mAlloc) LasmSideExit();
 774     memset(exit, 0, sizeof(LasmSideExit));
 775     exit->from = mFragment;
 776     exit->target = NULL;
 777     exit->line = mLineno;
 778     return exit;
 779 }
 780
 781 GuardRecord*
 782 FragmentAssembler::createGuardRecord(LasmSideExit *exit)
 783 {
 784     GuardRecord *rec = new (mParent.mAlloc) GuardRecord;
 785     memset(rec, 0, sizeof(GuardRecord));
 786     rec->exit = exit;
 787     exit->addGuard(rec);
 788     return rec;
 789 }
 790
 791 LIns *
 792 FragmentAssembler::assemble_guard(bool isCond)
 793 {
 794     GuardRecord* guard = createGuardRecord(createSideExit());
 795
 796     LIns *ins_cond;
 797     if (isCond) {
 798         need(1);
 799         ins_cond = ref(pop_front(mTokens));
 800     } else {
 801         need(0);
 802         ins_cond = NULL;
 803     }
 804
 805     mReturnTypeBits |= RT_GUARD;
 806
 807     if (!mTokens.empty())
 808         bad("too many arguments");
 809
 810     return mLir->insGuard(mOpcode, ins_cond, guard);
 811 }
 812
 813 LIns*
 814 FragmentAssembler::assemble_guard_xov()
 815 {
 816     GuardRecord* guard = createGuardRecord(createSideExit());
 817
 818     need(2);
 819
 820     mReturnTypeBits |= RT_GUARD;
 821
 822     return mLir->insGuardXov(mOpcode, ref(mTokens[0]), ref(mTokens[1]), guard);
 823 }
 824
 825 LIns *
 826 FragmentAssembler::assemble_jump_jov()
 827 {
 828     need(3);
 829
 830     LIns *a = ref(mTokens[0]);
 831     LIns *b = ref(mTokens[1]);
 832     string name = mTokens[2];
 833
 834     if (mLabels.find(name) != mLabels.end()) {
 835         LIns *target = ref(name);
 836         return mLir->insBranchJov(mOpcode, a, b, target);
 837     } else {
 838         LIns *ins = mLir->insBranchJov(mOpcode, a, b, NULL);
 839 #ifdef __SUNPRO_CC
 840         mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
 841 #else
 842         mFwdJumps.insert(make_pair(name, ins));
 843 #endif
 844         return ins;
 845     }
 846 }
 847
 848 void
 849 FragmentAssembler::endFragment()
 850 {
 851     if (mReturnTypeBits == 0) {
 852         cerr << "warning: no return type in fragment '"
 853              << mFragName << "'" << endl;
 854
 855     } else if (mReturnTypeBits != RT_INT &&
 856 #ifdef NANOJIT_64BIT
 857                mReturnTypeBits != RT_QUAD &&
 858 #endif
 859                mReturnTypeBits != RT_DOUBLE &&
 860                mReturnTypeBits != RT_GUARD)
 861     {
 862         cerr << "warning: multiple return types in fragment '"
 863              << mFragName << "'" << endl;
 864     }
 865
 866     mFragment->lastIns =
 867         mLir->insGuard(LIR_x, NULL, createGuardRecord(createSideExit()));
 868
 869     mParent.mAssm.compile(mFragment, mParent.mAlloc, optimize
 870               verbose_only(, mParent.mLirbuf->printer));
 871
 872     if (mParent.mAssm.error() != nanojit::None) {
 873         cerr << "error during assembly: ";
 874         switch (mParent.mAssm.error()) {
 875           case nanojit::BranchTooFar: cerr << "BranchTooFar"; break;
 876           case nanojit::StackFull: cerr << "StackFull"; break;
 877           case nanojit::UnknownBranch:  cerr << "UnknownBranch"; break;
 878           case nanojit::None: cerr << "None"; break;
 879           default: NanoAssert(0); break;
 880         }
 881         cerr << endl;
 882         std::exit(1);
 883     }
 884
 885     LirasmFragment *f;
 886     f = &mParent.mFragments[mFragName];
 887
 888     switch (mReturnTypeBits) {
 889     case RT_INT:
 890         f->rint = (RetInt)((uintptr_t)mFragment->code());
 891         f->mReturnType = RT_INT;
 892         break;
 893 #ifdef NANOJIT_64BIT
 894     case RT_QUAD:
 895         f->rquad = (RetQuad)((uintptr_t)mFragment->code());
 896         f->mReturnType = RT_QUAD;
 897         break;
 898 #endif
 899     case RT_DOUBLE:
 900         f->rdouble = (RetDouble)((uintptr_t)mFragment->code());
 901         f->mReturnType = RT_DOUBLE;
 902         break;
 903     case RT_GUARD:
 904         f->rguard = (RetGuard)((uintptr_t)mFragment->code());
 905         f->mReturnType = RT_GUARD;
 906         break;
 907     default:
 908         NanoAssert(0);
 909         break;
 910     }
 911
 912     mParent.mFragments[mFragName].mLabels = mLabels;
 913 }
 914
 915 void
 916 FragmentAssembler::tokenizeLine(LirTokenStream &in, LirToken &token)
 917 {
 918     mTokens.clear();
 919     mTokens.push_back(token.data);
 920
 921     while (in.get(token)) {
 922         if (token.type == NEWLINE)
 923             break;
 924         mTokens.push_back(token.data);
 925     }
 926 }
 927
 928 void
 929 FragmentAssembler::extract_any_label(string &lab, char lab_delim)
 930 {
 931     if (mTokens.size() > 2 && mTokens[1].size() == 1 && mTokens[1][0] == lab_delim) {
 932         lab = pop_front(mTokens);
 933         pop_front(mTokens);  // remove punctuation
 934
 935         if (mLabels.find(lab) != mLabels.end())
 936             bad("duplicate label");
 937     }
 938 }
 939
 940 void
 941 FragmentAssembler::resolve_forward_jumps(string &lab, LIns *ins)
 942 {
 943     typedef multimap<string, LIns *> mulmap;
 944 #ifdef __SUNPRO_CC
 945     typedef mulmap::iterator ci;
 946 #else
 947     typedef mulmap::const_iterator ci;
 948 #endif
 949     pair<ci, ci> range = mFwdJumps.equal_range(lab);
 950     for (ci i = range.first; i != range.second; ++i) {
 951         i->second->setTarget(ins);
 952     }
 953     mFwdJumps.erase(lab);
 954 }
 955
 956 void
 957 FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, const LirToken *firstToken)
 958 {
 959     LirToken token;
 960     while (true) {
 961         if (firstToken) {
 962             token = *firstToken;
 963             firstToken = NULL;
 964         } else if (!in.get(token)) {
 965             if (!implicitBegin)
 966                 bad("unexpected end of file in fragment '" + mFragName + "'");
 967             break;
 968         }
 969         if (token.type == NEWLINE)
 970             continue;
 971         if (token.type != NAME)
 972             bad("unexpected token '" + token.data + "'");
 973
 974         string op = token.data;
 975         if (op == ".begin")
 976             bad("nested fragments are not supported");
 977         if (op == ".end") {
 978             if (implicitBegin)
 979                 bad(".end without .begin");
 980             if (!in.eat(NEWLINE))
 981                 bad("extra junk after .end");
 982             break;
 983         }
 984
 985         mLineno = token.lineno;
 986         tokenizeLine(in, token);
 987
 988         string lab;
 989         LIns *ins = NULL;
 990         extract_any_label(lab, ':');
 991
 992         /* Save label and do any back-patching of deferred forward-jumps. */
 993         if (!lab.empty()) {
 994             ins = mLir->ins0(LIR_label);
 995             resolve_forward_jumps(lab, ins);
 996             lab.clear();
 997         }
 998         extract_any_label(lab, '=');
 999
1000         assert(!mTokens.empty());
1001         op = pop_front(mTokens);
1002         if (mParent.mOpMap.find(op) == mParent.mOpMap.end())
1003             bad("unknown instruction '" + op + "'");
1004
1005         mOpcode = mParent.mOpMap[op];
1006
1007         switch (mOpcode) {
1008           case LIR_start:
1009             bad("start instructions cannot be specified explicitly");
1010             break;
1011
1012           case LIR_regfence:
1013             need(0);
1014             ins = mLir->ins0(mOpcode);
1015             break;
1016
1017           case LIR_livei:
1018           CASE64(LIR_liveq:)
1019           case LIR_lived:
1020           case LIR_negi:
1021           case LIR_negd:
1022           case LIR_noti:
1023           CASESF(LIR_dlo2i:)
1024           CASESF(LIR_dhi2i:)
1025           CASE64(LIR_q2i:)
1026           CASE64(LIR_i2q:)
1027           CASE64(LIR_ui2uq:)
1028           CASE64(LIR_dasq:)
1029           CASE64(LIR_qasd:)
1030           case LIR_i2d:
1031           case LIR_ui2d:
1032           case LIR_d2i:
1033 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1034           case LIR_modi:
1035 #endif
1036             need(1);
1037             ins = mLir->ins1(mOpcode,
1038                              ref(mTokens[0]));
1039             break;
1040
1041           case LIR_addi:
1042           case LIR_subi:
1043           case LIR_muli:
1044 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1045           case LIR_divi:
1046 #endif
1047           case LIR_addd:
1048           case LIR_subd:
1049           case LIR_muld:
1050           case LIR_divd:
1051           CASE64(LIR_addq:)
1052           CASE64(LIR_subq:)
1053           case LIR_andi:
1054           case LIR_ori:
1055           case LIR_xori:
1056           CASE64(LIR_andq:)
1057           CASE64(LIR_orq:)
1058           CASE64(LIR_xorq:)
1059           case LIR_lshi:
1060           case LIR_rshi:
1061           case LIR_rshui:
1062           CASE64(LIR_lshq:)
1063           CASE64(LIR_rshq:)
1064           CASE64(LIR_rshuq:)
1065           case LIR_eqi:
1066           case LIR_lti:
1067           case LIR_gti:
1068           case LIR_lei:
1069           case LIR_gei:
1070           case LIR_ltui:
1071           case LIR_gtui:
1072           case LIR_leui:
1073           case LIR_geui:
1074           case LIR_eqd:
1075           case LIR_ltd:
1076           case LIR_gtd:
1077           case LIR_led:
1078           case LIR_ged:
1079           CASE64(LIR_eqq:)
1080           CASE64(LIR_ltq:)
1081           CASE64(LIR_gtq:)
1082           CASE64(LIR_leq:)
1083           CASE64(LIR_geq:)
1084           CASE64(LIR_ltuq:)
1085           CASE64(LIR_gtuq:)
1086           CASE64(LIR_leuq:)
1087           CASE64(LIR_geuq:)
1088           CASESF(LIR_ii2d:)
1089             need(2);
1090             ins = mLir->ins2(mOpcode,
1091                              ref(mTokens[0]),
1092                              ref(mTokens[1]));
1093             break;
1094
1095           case LIR_cmovi:
1096           CASE64(LIR_cmovq:)
1097           case LIR_cmovd:
1098             need(3);
1099             ins = mLir->ins3(mOpcode,
1100                              ref(mTokens[0]),
1101                              ref(mTokens[1]),
1102                              ref(mTokens[2]));
1103             break;
1104
1105           case LIR_j:
1106             ins = assemble_jump(/*isCond*/false);
1107             break;
1108
1109           case LIR_jt:
1110           case LIR_jf:
1111             ins = assemble_jump(/*isCond*/true);
1112             break;
1113
1114           case LIR_immi:
1115             need(1);
1116             ins = mLir->insImmI(immI(mTokens[0]));
1117             break;
1118
1119 #ifdef NANOJIT_64BIT
1120           case LIR_immq:
1121             need(1);
1122             ins = mLir->insImmQ(immQ(mTokens[0]));
1123             break;
1124 #endif
1125
1126           case LIR_immd:
1127             need(1);
1128             ins = mLir->insImmD(immD(mTokens[0]));
1129             break;
1130
1131 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1132           case LIR_sti2c:
1133           case LIR_sti2s:
1134           case LIR_std2f:
1135 #endif
1136           case LIR_sti:
1137           CASE64(LIR_stq:)
1138           case LIR_std:
1139             need(3);
1140             ins = mLir->insStore(mOpcode, ref(mTokens[0]),
1141                                   ref(mTokens[1]),
1142                                   immI(mTokens[2]), ACCSET_OTHER);
1143             break;
1144
1145 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1146           case LIR_ldc2i:
1147           case LIR_lds2i:
1148           case LIR_ldf2d:
1149 #endif
1150           case LIR_lduc2ui:
1151           case LIR_ldus2ui:
1152           case LIR_ldi:
1153           CASE64(LIR_ldq:)
1154           case LIR_ldd:
1155             ins = assemble_load();
1156             break;
1157
1158           // XXX: insParam gives the one appropriate for the platform.  Eg. if
1159           // you specify qparam on x86 you'll end up with iparam anyway.  Fix
1160           // this.
1161           case LIR_paramp:
1162             need(2);
1163             ins = mLir->insParam(immI(mTokens[0]),
1164                                  immI(mTokens[1]));
1165             break;
1166
1167           // XXX: similar to iparam/qparam above.
1168           case LIR_allocp:
1169             need(1);
1170             ins = mLir->insAlloc(immI(mTokens[0]));
1171             break;
1172
1173           case LIR_skip:
1174             bad("skip instruction is deprecated");
1175             break;
1176
1177           case LIR_x:
1178           case LIR_xbarrier:
1179             ins = assemble_guard(/*isCond*/false);
1180             break;
1181
1182           case LIR_xt:
1183           case LIR_xf:
1184             ins = assemble_guard(/*isCond*/true);
1185             break;
1186
1187           case LIR_addxovi:
1188           case LIR_subxovi:
1189           case LIR_mulxovi:
1190             ins = assemble_guard_xov();
1191             break;
1192
1193           case LIR_addjovi:
1194           case LIR_subjovi:
1195           case LIR_muljovi:
1196           CASE64(LIR_addjovq:)
1197           CASE64(LIR_subjovq:)
1198             ins = assemble_jump_jov();
1199             break;
1200
1201           case LIR_callv:
1202           case LIR_calli:
1203           CASESF(LIR_hcalli:)
1204           CASE64(LIR_callq:)
1205           case LIR_calld:
1206             ins = assemble_call(op);
1207             break;
1208
1209           case LIR_reti:
1210             ins = assemble_ret(RT_INT);
1211             break;
1212
1213 #ifdef NANOJIT_64BIT
1214           case LIR_retq:
1215             ins = assemble_ret(RT_QUAD);
1216             break;
1217 #endif
1218
1219           case LIR_retd:
1220             ins = assemble_ret(RT_DOUBLE);
1221             break;
1222
1223           case LIR_label:
1224             ins = mLir->ins0(LIR_label);
1225             if (!lab.empty()) {
1226                 resolve_forward_jumps(lab, ins);
1227             }
1228             break;
1229
1230           case LIR_file:
1231           case LIR_line:
1232           case LIR_xtbl:
1233           case LIR_jtbl:
1234             nyi(op);
1235             break;
1236
1237           default:
1238             nyi(op);
1239             break;
1240         }
1241
1242         assert(ins);
1243         if (!lab.empty())
1244             mLabels.insert(make_pair(lab, ins));
1245
1246     }
1247     endFragment();
1248 }
1249
1250 /* ------------------ Support for --random -------------------------- */
1251
1252 // Returns a positive integer in the range 0..(lim-1).
1253 static inline size_t
1254 rnd(size_t lim)
1255 {
1256     size_t i = size_t(rand());
1257     return i % lim;
1258 }
1259
1260 // Returns an int32_t in the range -RAND_MAX..RAND_MAX.
1261 static inline int32_t
1262 rndI32()
1263 {
1264     return (rnd(2) ? 1 : -1) * rand();
1265 }
1266
1267 // The maximum number of live values (per type, ie. B/I/Q/F) that are
1268 // available to be used as operands.  If we make it too high we're prone to
1269 // run out of stack space due to spilling.  Needs to be set in consideration
1270 // with spillStackSzB.
1271 const size_t maxLiveValuesPerType = 20;
1272
1273 // Returns a uint32_t in the range 0..(RAND_MAX*2).
1274 static inline uint32_t
1275 rndU32()
1276 {
1277     return uint32_t(rnd(2) ? 0 : RAND_MAX) + uint32_t(rand());
1278 }
1279
1280 template<typename t> t
1281 rndPick(vector<t> &v)
1282 {
1283     assert(!v.empty());
1284     return v[rnd(v.size())];
1285 }
1286
1287 // Add the operand, and retire an old one if we have too many.
1288 template<typename t> void
1289 addOrReplace(vector<t> &v, t x)
1290 {
1291     if (v.size() > maxLiveValuesPerType) {
1292         v[rnd(v.size())] = x;    // we're full:  overwrite an existing element
1293     } else {
1294         v.push_back(x);             // add to end
1295     }
1296 }
1297
1298 // Returns a 4-aligned address within the given size.
1299 static int32_t rndOffset32(size_t szB)
1300 {
1301     return int32_t(rnd(szB)) & ~3;
1302 }
1303
1304 // Returns an 8-aligned address within the give size.
1305 static int32_t rndOffset64(size_t szB)
1306 {
1307     return int32_t(rnd(szB)) & ~7;
1308 }
1309
1310 static int32_t f_I_I1(int32_t a)
1311 {
1312     return a;
1313 }
1314
1315 static int32_t f_I_I6(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f)
1316 {
1317     return a + b + c + d + e + f;
1318 }
1319
1320 #ifdef NANOJIT_64BIT
1321 static uint64_t f_Q_Q2(uint64_t a, uint64_t b)
1322 {
1323     return a + b;
1324 }
1325
1326 static uint64_t f_Q_Q7(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
1327                        uint64_t e, uint64_t f, uint64_t g)
1328 {
1329     return a + b + c + d + e + f + g;
1330 }
1331 #endif
1332
1333 static double f_F_F3(double a, double b, double c)
1334 {
1335     return a + b + c;
1336 }
1337
1338 static double f_F_F8(double a, double b, double c, double d,
1339                      double e, double f, double g, double h)
1340 {
1341     return a + b + c + d + e + f + g + h;
1342 }
1343
1344 #ifdef NANOJIT_64BIT
1345 static void f_V_IQF(int32_t, uint64_t, double)
1346 {
1347     return;     // no need to do anything
1348 }
1349 #endif
1350
1351 const CallInfo ci_I_I1 = CI(f_I_I1, CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_I));
1352 const CallInfo ci_I_I6 = CI(f_I_I6, CallInfo::typeSig6(ARGTYPE_I, ARGTYPE_I, ARGTYPE_I, ARGTYPE_I,
1353                                                        ARGTYPE_I, ARGTYPE_I, ARGTYPE_I));
1354
1355 #ifdef NANOJIT_64BIT
1356 const CallInfo ci_Q_Q2 = CI(f_Q_Q2, CallInfo::typeSig2(ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q));
1357 const CallInfo ci_Q_Q7 = CI(f_Q_Q7, CallInfo::typeSig7(ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q,
1358                                                        ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q));
1359 #endif
1360
1361 const CallInfo ci_F_F3 = CI(f_F_F3, CallInfo::typeSig3(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D));
1362 const CallInfo ci_F_F8 = CI(f_F_F8, CallInfo::typeSig8(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
1363                                                        ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
1364                                                        ARGTYPE_D));
1365
1366 #ifdef NANOJIT_64BIT
1367 const CallInfo ci_V_IQF = CI(f_V_IQF, CallInfo::typeSig3(ARGTYPE_V, ARGTYPE_I, ARGTYPE_Q, ARGTYPE_D));
1368 #endif
1369
1370 // Generate a random block containing nIns instructions, plus a few more
1371 // setup/shutdown ones at the start and end.
1372 //
1373 // Basic operation:
1374 // - We divide LIR into numerous classes, mostly according to their type.
1375 //   (See LInsClasses.tbl for details.) Each time around the loop we choose
1376 //   the class randomly, but there is weighting so that some classes are more
1377 //   common than others, in an attempt to reflect the structure of real code.
1378 // - Each instruction that produces a value is put in a buffer of the
1379 //   appropriate type, for possible use as an operand of a later instruction.
1380 //   This buffer is trimmed when its size exceeds 'maxLiveValuesPerType'.
1381 // - If not enough operands are present in a buffer for the particular
1382 //   instruction, we don't add it.
1383 // - Skips aren't explicitly generated, but they do occcur if the fragment is
1384 //   sufficiently big that it's spread across multiple chunks.
1385 //
1386 // The following instructions aren't generated yet:
1387 // - LIR_parami/LIR_paramq (hard to test beyond what is auto-generated in fragment
1388 //   prologues)
1389 // - LIR_livei/LIR_liveq/LIR_lived
1390 // - LIR_hcalli
1391 // - LIR_x/LIR_xt/LIR_xf/LIR_xtbl/LIR_addxovi/LIR_subxovi/LIR_mulxovi (hard to
1392 //   test without having multiple fragments;  when we only have one fragment
1393 //   we don't really want to leave it early)
1394 // - LIR_reti/LIR_retq/LIR_retd (hard to test without having multiple fragments)
1395 // - LIR_j/LIR_jt/LIR_jf/LIR_jtbl/LIR_label
1396 // - LIR_file/LIR_line (#ifdef VTUNE only)
1397 // - LIR_modd (not implemented in NJ backends)
1398 //
1399 // Other limitations:
1400 // - Loads always use accSet==ACCSET_OTHER
1401 // - Stores always use accSet==ACCSET_OTHER
1402 //
1403 void
1404 FragmentAssembler::assembleRandomFragment(int nIns)
1405 {
1406     vector<LIns*> Bs;       // boolean values, ie. 32-bit int values produced by tests
1407     vector<LIns*> Is;       // 32-bit int values
1408     vector<LIns*> Qs;       // 64-bit int values
1409     vector<LIns*> Ds;       // 64-bit double values
1410     vector<LIns*> M4s;      // 4 byte allocs
1411     vector<LIns*> M8ps;     // 8+ byte allocs
1412
1413     vector<LOpcode> I_I_ops;
1414     I_I_ops.push_back(LIR_negi);
1415     I_I_ops.push_back(LIR_noti);
1416
1417     // Nb: there are no Q_Q_ops.
1418
1419     vector<LOpcode> D_D_ops;
1420     D_D_ops.push_back(LIR_negd);
1421
1422     vector<LOpcode> I_II_ops;
1423     I_II_ops.push_back(LIR_addi);
1424     I_II_ops.push_back(LIR_subi);
1425     I_II_ops.push_back(LIR_muli);
1426 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1427     I_II_ops.push_back(LIR_divi);
1428     I_II_ops.push_back(LIR_modi);
1429 #endif
1430     I_II_ops.push_back(LIR_andi);
1431     I_II_ops.push_back(LIR_ori);
1432     I_II_ops.push_back(LIR_xori);
1433     I_II_ops.push_back(LIR_lshi);
1434     I_II_ops.push_back(LIR_rshi);
1435     I_II_ops.push_back(LIR_rshui);
1436
1437 #ifdef NANOJIT_64BIT
1438     vector<LOpcode> Q_QQ_ops;
1439     Q_QQ_ops.push_back(LIR_addq);
1440     Q_QQ_ops.push_back(LIR_andq);
1441     Q_QQ_ops.push_back(LIR_orq);
1442     Q_QQ_ops.push_back(LIR_xorq);
1443
1444     vector<LOpcode> Q_QI_ops;
1445     Q_QI_ops.push_back(LIR_lshq);
1446     Q_QI_ops.push_back(LIR_rshq);
1447     Q_QI_ops.push_back(LIR_rshuq);
1448 #endif
1449
1450     vector<LOpcode> D_DD_ops;
1451     D_DD_ops.push_back(LIR_addd);
1452     D_DD_ops.push_back(LIR_subd);
1453     D_DD_ops.push_back(LIR_muld);
1454     D_DD_ops.push_back(LIR_divd);
1455
1456     vector<LOpcode> I_BII_ops;
1457     I_BII_ops.push_back(LIR_cmovi);
1458
1459 #ifdef NANOJIT_64BIT
1460     vector<LOpcode> Q_BQQ_ops;
1461     Q_BQQ_ops.push_back(LIR_cmovq);
1462 #endif
1463
1464     vector<LOpcode> D_BDD_ops;
1465     D_BDD_ops.push_back(LIR_cmovd);
1466
1467     vector<LOpcode> B_II_ops;
1468     B_II_ops.push_back(LIR_eqi);
1469     B_II_ops.push_back(LIR_lti);
1470     B_II_ops.push_back(LIR_gti);
1471     B_II_ops.push_back(LIR_lei);
1472     B_II_ops.push_back(LIR_gei);
1473     B_II_ops.push_back(LIR_ltui);
1474     B_II_ops.push_back(LIR_gtui);
1475     B_II_ops.push_back(LIR_leui);
1476     B_II_ops.push_back(LIR_geui);
1477
1478 #ifdef NANOJIT_64BIT
1479     vector<LOpcode> B_QQ_ops;
1480     B_QQ_ops.push_back(LIR_eqq);
1481     B_QQ_ops.push_back(LIR_ltq);
1482     B_QQ_ops.push_back(LIR_gtq);
1483     B_QQ_ops.push_back(LIR_leq);
1484     B_QQ_ops.push_back(LIR_geq);
1485     B_QQ_ops.push_back(LIR_ltuq);
1486     B_QQ_ops.push_back(LIR_gtuq);
1487     B_QQ_ops.push_back(LIR_leuq);
1488     B_QQ_ops.push_back(LIR_geuq);
1489 #endif
1490
1491     vector<LOpcode> B_DD_ops;
1492     B_DD_ops.push_back(LIR_eqd);
1493     B_DD_ops.push_back(LIR_ltd);
1494     B_DD_ops.push_back(LIR_gtd);
1495     B_DD_ops.push_back(LIR_led);
1496     B_DD_ops.push_back(LIR_ged);
1497
1498 #ifdef NANOJIT_64BIT
1499     vector<LOpcode> Q_I_ops;
1500     Q_I_ops.push_back(LIR_i2q);
1501     Q_I_ops.push_back(LIR_ui2uq);
1502
1503     vector<LOpcode> I_Q_ops;
1504     I_Q_ops.push_back(LIR_q2i);
1505 #endif
1506
1507     vector<LOpcode> D_I_ops;
1508 #if !NJ_SOFTFLOAT_SUPPORTED
1509     // Don't emit LIR_{ui,i}2d for soft-float platforms because the soft-float filter removes them.
1510     D_I_ops.push_back(LIR_i2d);
1511     D_I_ops.push_back(LIR_ui2d);
1512 #elif defined(NANOJIT_ARM)
1513     // The ARM back-end can detect FP support at run-time.
1514     if (avmplus::AvmCore::config.arm_vfp) {
1515         D_I_ops.push_back(LIR_i2d);
1516         D_I_ops.push_back(LIR_ui2d);
1517     }
1518 #endif
1519
1520     vector<LOpcode> I_D_ops;
1521 #if NJ_SOFTFLOAT_SUPPORTED
1522     I_D_ops.push_back(LIR_dlo2i);
1523     I_D_ops.push_back(LIR_dhi2i);
1524 #endif
1525 #if !NJ_SOFTFLOAT_SUPPORTED
1526     // Don't emit LIR_d2i for soft-float platforms because the soft-float filter removes it.
1527     I_D_ops.push_back(LIR_d2i);
1528 #elif defined(NANOJIT_ARM)
1529     // The ARM back-end can detect FP support at run-time.
1530     if (avmplus::AvmCore::config.arm_vfp) {
1531         I_D_ops.push_back(LIR_d2i);
1532     }
1533 #endif
1534
1535 #ifdef NANOJIT_64BIT
1536     vector<LOpcode> Q_D_ops;
1537     Q_D_ops.push_back(LIR_dasq);
1538
1539     vector<LOpcode> D_Q_ops;
1540     D_Q_ops.push_back(LIR_qasd);
1541 #endif
1542
1543     vector<LOpcode> D_II_ops;
1544 #if NJ_SOFTFLOAT_SUPPORTED
1545     D_II_ops.push_back(LIR_ii2d);
1546 #endif
1547
1548     vector<LOpcode> I_loads;
1549     I_loads.push_back(LIR_ldi);          // weight LIR_ldi more heavily
1550     I_loads.push_back(LIR_ldi);
1551     I_loads.push_back(LIR_ldi);
1552     I_loads.push_back(LIR_lduc2ui);
1553     I_loads.push_back(LIR_ldus2ui);
1554 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1555     I_loads.push_back(LIR_ldc2i);
1556     I_loads.push_back(LIR_lds2i);
1557 #endif
1558
1559 #ifdef NANOJIT_64BIT
1560     vector<LOpcode> Q_loads;
1561     Q_loads.push_back(LIR_ldq);
1562 #endif
1563
1564     vector<LOpcode> D_loads;
1565     D_loads.push_back(LIR_ldd);
1566 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1567     // this loads a 32-bit float and expands it to 64-bit float
1568     D_loads.push_back(LIR_ldf2d);
1569 #endif
1570
1571     enum LInsClass {
1572 #define CL___(name, relFreq)     name,
1573 #include "LInsClasses.tbl"
1574 #undef CL___
1575         LLAST
1576     };
1577
1578     int relFreqs[LLAST];
1579     memset(relFreqs, 0, sizeof(relFreqs));
1580 #define CL___(name, relFreq)     relFreqs[name] = relFreq;
1581 #include "LInsClasses.tbl"
1582 #undef CL___
1583
1584     int relFreqsSum = 0;    // the sum of the individual relative frequencies
1585     for (int c = 0; c < LLAST; c++) {
1586         relFreqsSum += relFreqs[c];
1587     }
1588
1589     // The number of times each LInsClass value appears in classGenerator[]
1590     // matches 'relFreqs' (see LInsClasses.tbl).  Eg. if relFreqs[LIMM_I] ==
1591     // 10, then LIMM_I appears in classGenerator[] 10 times.
1592     LInsClass* classGenerator = new LInsClass[relFreqsSum];
1593     int j = 0;
1594     for (int c = 0; c < LLAST; c++) {
1595         for (int i = 0; i < relFreqs[c]; i++) {
1596             classGenerator[j++] = LInsClass(c);
1597         }
1598     }
1599
1600     // Used to keep track of how much stack we've explicitly used via
1601     // LIR_allocp.  We then need to keep some reserve for spills as well.
1602     const size_t stackSzB = NJ_MAX_STACK_ENTRY * 4;
1603     const size_t spillStackSzB = 1024;
1604     const size_t maxExplicitlyUsedStackSzB = stackSzB - spillStackSzB;
1605     size_t explicitlyUsedStackSzB = 0;
1606
1607     // Do an 8-byte stack alloc right at the start so that loads and stores
1608     // can be done immediately.
1609     addOrReplace(M8ps, mLir->insAlloc(8));
1610
1611     int n = 0;
1612     while (n < nIns) {
1613
1614         LIns *ins;
1615
1616         switch (classGenerator[rnd(relFreqsSum)]) {
1617
1618         case LFENCE:
1619             if (rnd(2)) {
1620                 mLir->ins0(LIR_regfence);
1621             } else {
1622                 mLir->insGuard(LIR_xbarrier, NULL, createGuardRecord(createSideExit()));
1623             }
1624             n++;
1625             break;
1626
1627         case LALLOC: {
1628             // The stack has a limited size, so we (a) don't want chunks to be
1629             // too big, and (b) have to stop allocating them after a while.
1630             size_t szB = 0;
1631             switch (rnd(3)) {
1632             case 0: szB = 4;                break;
1633             case 1: szB = 8;                break;
1634             case 2: szB = 4 * (rnd(6) + 3); break;  // 12, 16, ..., 32
1635             }
1636             if (explicitlyUsedStackSzB + szB <= maxExplicitlyUsedStackSzB) {
1637                 ins = mLir->insAlloc(szB);
1638                 // We add the result to Is/Qs so it can be used as an ordinary
1639                 // operand, and to M4s/M8ps so that loads/stores can be done from
1640                 // it.
1641 #if defined NANOJIT_64BIT
1642                 addOrReplace(Qs, ins);
1643 #else
1644                 addOrReplace(Is, ins);
1645 #endif
1646                 if (szB == 4)
1647                     addOrReplace(M4s, ins);
1648                 else
1649                     addOrReplace(M8ps, ins);
1650
1651                 // It's possible that we will exceed maxExplicitlyUsedStackSzB
1652                 // by up to 28 bytes.  Doesn't matter.
1653                 explicitlyUsedStackSzB += szB;
1654                 n++;
1655             }
1656             break;
1657         }
1658
1659         // For the immediates, we bias towards smaller numbers, especially 0
1660         // and 1 and small multiples of 4 which are common due to memory
1661         // addressing.  This puts some realistic stress on CseFilter.
1662         case LIMM_I: {
1663             int32_t immI = 0;      // shut gcc up
1664             switch (rnd(5)) {
1665             case 0: immI = 0;                  break;
1666             case 1: immI = 1;                  break;
1667             case 2: immI = 4 * (rnd(256) + 1); break;  // 4, 8, ..., 1024
1668             case 3: immI = rnd(19999) - 9999;  break;  // -9999..9999
1669             case 4: immI = rndI32();           break;  // -RAND_MAX..RAND_MAX
1670             }
1671             ins = mLir->insImmI(immI);
1672             addOrReplace(Is, ins);
1673             n++;
1674             break;
1675         }
1676
1677 #ifdef NANOJIT_64BIT
1678         case LIMM_Q: {
1679             uint64_t imm64 = 0;
1680             switch (rnd(5)) {
1681             case 0: imm64 = 0;                                      break;
1682             case 1: imm64 = 1;                                      break;
1683             case 2: imm64 = 4 * (rnd(256) + 1);                     break;  // 4, 8, ..., 1024
1684             case 3: imm64 = rnd(19999) - 9999;                      break;  // -9999..9999
1685             case 4: imm64 = uint64_t(rndU32()) << 32 | rndU32();    break;  // possibly big!
1686             }
1687             ins = mLir->insImmQ(imm64);
1688             addOrReplace(Qs, ins);
1689             n++;
1690             break;
1691         }
1692 #endif
1693
1694         case LIMM_D: {
1695             // We don't explicitly generate infinities and NaNs here, but they
1696             // end up occurring due to ExprFilter evaluating expressions like
1697             // divd(1,0) and divd(Infinity,Infinity).
1698             double imm64f = 0;
1699             switch (rnd(5)) {
1700             case 0: imm64f = 0.0;                                           break;
1701             case 1: imm64f = 1.0;                                           break;
1702             case 2:
1703             case 3: imm64f = double(rnd(1000));                             break;  // 0.0..9999.0
1704             case 4:
1705                 union {
1706                     double d;
1707                     uint64_t q;
1708                 } u;
1709                 u.q = uint64_t(rndU32()) << 32 | rndU32();
1710                 imm64f = u.d;
1711                 break;
1712             }
1713             ins = mLir->insImmD(imm64f);
1714             addOrReplace(Ds, ins);
1715             n++;
1716             break;
1717         }
1718
1719         case LOP_I_I:
1720             if (!Is.empty()) {
1721                 ins = mLir->ins1(rndPick(I_I_ops), rndPick(Is));
1722                 addOrReplace(Is, ins);
1723                 n++;
1724             }
1725             break;
1726
1727         // case LOP_Q_Q:  no instruction in this category
1728
1729         case LOP_D_D:
1730             if (!Ds.empty()) {
1731                 ins = mLir->ins1(rndPick(D_D_ops), rndPick(Ds));
1732                 addOrReplace(Ds, ins);
1733                 n++;
1734             }
1735             break;
1736
1737         case LOP_I_II:
1738             if (!Is.empty()) {
1739                 LOpcode op = rndPick(I_II_ops);
1740                 LIns* lhs = rndPick(Is);
1741                 LIns* rhs = rndPick(Is);
1742 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1743                 if (op == LIR_divi || op == LIR_modi) {
1744                     // XXX: ExprFilter can't fold a div/mod with constant
1745                     // args, due to the horrible semantics of LIR_modi.  So we
1746                     // just don't generate anything if we hit that case.
1747                     if (!lhs->isImmI() || !rhs->isImmI()) {
1748                         // If the divisor is positive, no problems.  If it's zero, we get an
1749                         // exception.  If it's -1 and the dividend is -2147483648 (-2^31) we get
1750                         // an exception (and this has been encountered in practice).  So we only
1751                         // allow positive divisors, ie. compute:  lhs / (rhs > 0 ? rhs : -k),
1752                         // where k is a random number in the range 2..100 (this ensures we have
1753                         // some negative divisors).
1754                         LIns* gt0  = mLir->ins2ImmI(LIR_gti, rhs, 0);
1755                         LIns* rhs2 = mLir->ins3(LIR_cmovi, gt0, rhs, mLir->insImmI(-((int32_t)rnd(99)) - 2));
1756                         LIns* div  = mLir->ins2(LIR_divi, lhs, rhs2);
1757                         if (op == LIR_divi) {
1758                             ins = div;
1759                             addOrReplace(Is, ins);
1760                             n += 5;
1761                         } else {
1762                             ins = mLir->ins1(LIR_modi, div);
1763                             // Add 'div' to the operands too so it might be used again, because
1764                             // the code generated is different as compared to the case where 'div'
1765                             // isn't used again.
1766                             addOrReplace(Is, div);
1767                             addOrReplace(Is, ins);
1768                             n += 6;
1769                         }
1770                     }
1771                 } else
1772 #endif
1773                 {
1774                     ins = mLir->ins2(op, lhs, rhs);
1775                     addOrReplace(Is, ins);
1776                     n++;
1777                 }
1778             }
1779             break;
1780
1781 #ifdef NANOJIT_64BIT
1782         case LOP_Q_QQ:
1783             if (!Qs.empty()) {
1784                 ins = mLir->ins2(rndPick(Q_QQ_ops), rndPick(Qs), rndPick(Qs));
1785                 addOrReplace(Qs, ins);
1786                 n++;
1787             }
1788             break;
1789
1790         case LOP_Q_QI:
1791             if (!Qs.empty() && !Is.empty()) {
1792                 ins = mLir->ins2(rndPick(Q_QI_ops), rndPick(Qs), rndPick(Is));
1793                 addOrReplace(Qs, ins);
1794                 n++;
1795             }
1796             break;
1797 #endif
1798
1799         case LOP_D_DD:
1800             if (!Ds.empty()) {
1801                 ins = mLir->ins2(rndPick(D_DD_ops), rndPick(Ds), rndPick(Ds));
1802                 addOrReplace(Ds, ins);
1803                 n++;
1804             }
1805             break;
1806
1807         case LOP_I_BII:
1808             if (!Bs.empty() && !Is.empty()) {
1809                 ins = mLir->ins3(rndPick(I_BII_ops), rndPick(Bs), rndPick(Is), rndPick(Is));
1810                 addOrReplace(Is, ins);
1811                 n++;
1812             }
1813             break;
1814
1815 #ifdef NANOJIT_64BIT
1816         case LOP_Q_BQQ:
1817             if (!Bs.empty() && !Qs.empty()) {
1818                 ins = mLir->ins3(rndPick(Q_BQQ_ops), rndPick(Bs), rndPick(Qs), rndPick(Qs));
1819                 addOrReplace(Qs, ins);
1820                 n++;
1821             }
1822             break;
1823 #endif
1824
1825         case LOP_D_BDD:
1826             if (!Bs.empty() && !Ds.empty()) {
1827                 ins = mLir->ins3(rndPick(D_BDD_ops), rndPick(Bs), rndPick(Ds), rndPick(Ds));
1828                 addOrReplace(Ds, ins);
1829                 n++;
1830             }
1831             break;
1832
1833         case LOP_B_II:
1834            if (!Is.empty()) {
1835                ins = mLir->ins2(rndPick(B_II_ops), rndPick(Is), rndPick(Is));
1836                addOrReplace(Bs, ins);
1837                n++;
1838            }
1839             break;
1840
1841 #ifdef NANOJIT_64BIT
1842         case LOP_B_QQ:
1843             if (!Qs.empty()) {
1844                 ins = mLir->ins2(rndPick(B_QQ_ops), rndPick(Qs), rndPick(Qs));
1845                 addOrReplace(Bs, ins);
1846                 n++;
1847             }
1848             break;
1849 #endif
1850
1851         case LOP_B_DD:
1852             if (!Ds.empty()) {
1853                 ins = mLir->ins2(rndPick(B_DD_ops), rndPick(Ds), rndPick(Ds));
1854                 // XXX: we don't push the result, because most (all?) of the
1855                 // backends currently can't handle cmovs/qcmovs that take
1856                 // float comparisons for the test (see bug 520944).  This means
1857                 // that all B_DD values are dead, unfortunately.
1858                 //addOrReplace(Bs, ins);
1859                 n++;
1860             }
1861             break;
1862
1863 #ifdef NANOJIT_64BIT
1864         case LOP_Q_I:
1865             if (!Is.empty()) {
1866                 ins = mLir->ins1(rndPick(Q_I_ops), rndPick(Is));
1867                 addOrReplace(Qs, ins);
1868                 n++;
1869             }
1870             break;
1871 #endif
1872
1873         case LOP_D_I:
1874             if (!Is.empty() && !D_I_ops.empty()) {
1875                 ins = mLir->ins1(rndPick(D_I_ops), rndPick(Is));
1876                 addOrReplace(Ds, ins);
1877                 n++;
1878             }
1879             break;
1880
1881 #ifdef NANOJIT_64BIT
1882         case LOP_I_Q:
1883             if (!Qs.empty()) {
1884                 ins = mLir->ins1(rndPick(I_Q_ops), rndPick(Qs));
1885                 addOrReplace(Is, ins);
1886                 n++;
1887             }
1888             break;
1889 #endif
1890
1891         case LOP_I_D:
1892 // XXX: NativeX64 doesn't implement qhi yet (and it may not need to).
1893 #if !defined NANOJIT_X64
1894             if (!Ds.empty()) {
1895                 ins = mLir->ins1(rndPick(I_D_ops), rndPick(Ds));
1896                 addOrReplace(Is, ins);
1897                 n++;
1898             }
1899 #endif
1900             break;
1901
1902 #if defined NANOJIT_X64
1903         case LOP_Q_D:
1904             if (!Ds.empty()) {
1905                 ins = mLir->ins1(rndPick(Q_D_ops), rndPick(Ds));
1906                 addOrReplace(Qs, ins);
1907                 n++;
1908             }
1909             break;
1910
1911         case LOP_D_Q:
1912             if (!Qs.empty()) {
1913                 ins = mLir->ins1(rndPick(D_Q_ops), rndPick(Qs));
1914                 addOrReplace(Ds, ins);
1915                 n++;
1916             }
1917             break;
1918 #endif
1919
1920         case LOP_D_II:
1921             if (!Is.empty() && !D_II_ops.empty()) {
1922                 ins = mLir->ins2(rndPick(D_II_ops), rndPick(Is), rndPick(Is));
1923                 addOrReplace(Ds, ins);
1924                 n++;
1925             }
1926             break;
1927
1928         case LLD_I: {
1929             vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
1930             if (!Ms.empty()) {
1931                 LIns* base = rndPick(Ms);
1932                 ins = mLir->insLoad(rndPick(I_loads), base, rndOffset32(base->size()), ACCSET_OTHER);
1933                 addOrReplace(Is, ins);
1934                 n++;
1935             }
1936             break;
1937         }
1938
1939 #ifdef NANOJIT_64BIT
1940         case LLD_Q:
1941             if (!M8ps.empty()) {
1942                 LIns* base = rndPick(M8ps);
1943                 ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()), ACCSET_OTHER);
1944                 addOrReplace(Qs, ins);
1945                 n++;
1946             }
1947             break;
1948 #endif
1949
1950         case LLD_D:
1951             if (!M8ps.empty()) {
1952                 LIns* base = rndPick(M8ps);
1953                 ins = mLir->insLoad(rndPick(D_loads), base, rndOffset64(base->size()), ACCSET_OTHER);
1954                 addOrReplace(Ds, ins);
1955                 n++;
1956             }
1957             break;
1958
1959         case LST_I: {
1960             vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
1961             if (!Ms.empty() && !Is.empty()) {
1962                 LIns* base = rndPick(Ms);
1963                 mLir->insStore(rndPick(Is), base, rndOffset32(base->size()), ACCSET_OTHER);
1964                 n++;
1965             }
1966             break;
1967         }
1968
1969 #ifdef NANOJIT_64BIT
1970         case LST_Q:
1971             if (!M8ps.empty() && !Qs.empty()) {
1972                 LIns* base = rndPick(M8ps);
1973                 mLir->insStore(rndPick(Qs), base, rndOffset64(base->size()), ACCSET_OTHER);
1974                 n++;
1975             }
1976             break;
1977 #endif
1978
1979         case LST_D:
1980             if (!M8ps.empty() && !Ds.empty()) {
1981                 LIns* base = rndPick(M8ps);
1982                 mLir->insStore(rndPick(Ds), base, rndOffset64(base->size()), ACCSET_OTHER);
1983                 n++;
1984             }
1985             break;
1986
1987         case LCALL_I_I1:
1988             if (!Is.empty()) {
1989                 LIns* args[1] = { rndPick(Is) };
1990                 ins = mLir->insCall(&ci_I_I1, args);
1991                 addOrReplace(Is, ins);
1992                 n++;
1993             }
1994             break;
1995
1996         case LCALL_I_I6:
1997             if (!Is.empty()) {
1998                 LIns* args[6] = { rndPick(Is), rndPick(Is), rndPick(Is),
1999                                   rndPick(Is), rndPick(Is), rndPick(Is) };
2000                 ins = mLir->insCall(&ci_I_I6, args);
2001                 addOrReplace(Is, ins);
2002                 n++;
2003             }
2004             break;
2005
2006 #ifdef NANOJIT_64BIT
2007         case LCALL_Q_Q2:
2008             if (!Qs.empty()) {
2009                 LIns* args[2] = { rndPick(Qs), rndPick(Qs) };
2010                 ins = mLir->insCall(&ci_Q_Q2, args);
2011                 addOrReplace(Qs, ins);
2012                 n++;
2013             }
2014             break;
2015
2016         case LCALL_Q_Q7:
2017             if (!Qs.empty()) {
2018                 LIns* args[7] = { rndPick(Qs), rndPick(Qs), rndPick(Qs), rndPick(Qs),
2019                                   rndPick(Qs), rndPick(Qs), rndPick(Qs) };
2020                 ins = mLir->insCall(&ci_Q_Q7, args);
2021                 addOrReplace(Qs, ins);
2022                 n++;
2023             }
2024             break;
2025 #endif
2026
2027         case LCALL_D_D3:
2028             if (!Ds.empty()) {
2029                 LIns* args[3] = { rndPick(Ds), rndPick(Ds), rndPick(Ds) };
2030                 ins = mLir->insCall(&ci_F_F3, args);
2031                 addOrReplace(Ds, ins);
2032                 n++;
2033             }
2034             break;
2035
2036         case LCALL_D_D8:
2037             if (!Ds.empty()) {
2038                 LIns* args[8] = { rndPick(Ds), rndPick(Ds), rndPick(Ds), rndPick(Ds),
2039                                   rndPick(Ds), rndPick(Ds), rndPick(Ds), rndPick(Ds) };
2040                 ins = mLir->insCall(&ci_F_F8, args);
2041                 addOrReplace(Ds, ins);
2042                 n++;
2043             }
2044             break;
2045
2046 #ifdef NANOJIT_64BIT
2047         case LCALL_V_IQD:
2048             if (!Is.empty() && !Qs.empty() && !Ds.empty()) {
2049                 // Nb: args[] holds the args in reverse order... sigh.
2050                 LIns* args[3] = { rndPick(Ds), rndPick(Qs), rndPick(Is) };
2051                 ins = mLir->insCall(&ci_V_IQF, args);
2052                 n++;
2053             }
2054             break;
2055 #endif
2056
2057         case LLABEL:
2058             // Although no jumps are generated yet, labels are important
2059             // because they delimit areas where CSE can be applied.  Without
2060             // them, CSE can be applied over very long regions, which leads to
2061             // values that have very large live ranges, which leads to stack
2062             // overflows.
2063             mLir->ins0(LIR_label);
2064             n++;
2065             break;
2066
2067         default:
2068             NanoAssert(0);
2069             break;
2070         }
2071     }
2072
2073     delete[] classGenerator;
2074
2075     // Return 0.
2076     mReturnTypeBits |= RT_INT;
2077     mLir->ins1(LIR_reti, mLir->insImmI(0));
2078
2079     endFragment();
2080 }
2081
2082 Lirasm::Lirasm(bool verbose) :
2083     mAssm(mCodeAlloc, mAlloc, mAlloc, &mCore, &mLogc, nanojit::AvmCore::config)
2084 {
2085     mVerbose = verbose;
2086     mLogc.lcbits = 0;
2087
2088     mLirbuf = new (mAlloc) LirBuffer(mAlloc);
2089 #ifdef DEBUG
2090     if (mVerbose) {
2091         mLogc.lcbits = LC_ReadLIR | LC_AfterDCE | LC_Native | LC_RegAlloc | LC_Activation;
2092         mLirbuf->printer = new (mAlloc) LInsPrinter(mAlloc, LIRASM_NUM_USED_ACCS);
2093     }
2094 #endif
2095
2096     // Populate the mOpMap table.
2097 #define OP___(op, number, repKind, retType, isCse) \
2098     mOpMap[#op] = LIR_##op;
2099 #include "nanojit/LIRopcode.tbl"
2100 #undef OP___
2101
2102     // XXX: could add more pointer-sized synonyms here
2103     mOpMap["paramp"] = mOpMap[PTR_SIZE("parami", "paramq")];
2104     mOpMap["livep"]  = mOpMap[PTR_SIZE("livei", "liveq")];
2105 }
2106
2107 Lirasm::~Lirasm()
2108 {
2109     Fragments::iterator i;
2110     for (i = mFragments.begin(); i != mFragments.end(); ++i) {
2111         delete i->second.fragptr;
2112     }
2113 }
2114
2115
2116 bool
2117 Lirasm::lookupFunction(const string &name, CallInfo *&ci)
2118 {
2119     const size_t nfuns = sizeof(functions) / sizeof(functions[0]);
2120     for (size_t i = 0; i < nfuns; i++) {
2121         if (name == functions[i].name) {
2122             *ci = functions[i].callInfo;
2123             return true;
2124         }
2125     }
2126
2127     Fragments::const_iterator func = mFragments.find(name);
2128     if (func != mFragments.end()) {
2129         // The ABI, arg types and ret type will be overridden by the caller.
2130         if (func->second.mReturnType == RT_DOUBLE) {
2131             CallInfo target = {(uintptr_t) func->second.rdouble,
2132                                0, ABI_FASTCALL, /*isPure*/0, ACCSET_STORE_ANY
2133                                verbose_only(, func->first.c_str()) };
2134             *ci = target;
2135
2136         } else {
2137             CallInfo target = {(uintptr_t) func->second.rint,
2138                                0, ABI_FASTCALL, /*isPure*/0, ACCSET_STORE_ANY
2139                                verbose_only(, func->first.c_str()) };
2140             *ci = target;
2141         }
2142         return false;
2143
2144     } else {
2145         bad("invalid function reference " + name);
2146         return false;
2147     }
2148 }
2149
2150 void
2151 Lirasm::assemble(istream &in, bool optimize)
2152 {
2153     LirTokenStream ts(in);
2154     bool first = true;
2155
2156     LirToken token;
2157     while (ts.get(token)) {
2158
2159         if (token.type == NEWLINE)
2160             continue;
2161         if (token.type != NAME)
2162             bad("unexpected token '" + token.data + "'");
2163
2164         const string &op = token.data;
2165         if (op == ".patch") {
2166             handlePatch(ts);
2167         } else if (op == ".begin") {
2168             string name;
2169             if (!ts.getName(name))
2170                 bad("expected fragment name after .begin");
2171             if (!ts.eat(NEWLINE))
2172                 bad("extra junk after .begin " + name);
2173
2174             FragmentAssembler assembler(*this, name, optimize);
2175             assembler.assembleFragment(ts, false, NULL);
2176             first = false;
2177         } else if (op == ".end") {
2178             bad(".end without .begin");
2179         } else if (first) {
2180             FragmentAssembler assembler(*this, "main", optimize);
2181             assembler.assembleFragment(ts, true, &token);
2182             break;
2183         } else {
2184             bad("unexpected stray opcode '" + op + "'");
2185         }
2186     }
2187 }
2188
2189 void
2190 Lirasm::assembleRandom(int nIns, bool optimize)
2191 {
2192     string name = "main";
2193     FragmentAssembler assembler(*this, name, optimize);
2194     assembler.assembleRandomFragment(nIns);
2195 }
2196
2197 void
2198 Lirasm::handlePatch(LirTokenStream &in)
2199 {
2200     string src, fragName, guardName, destName;
2201
2202     if (!in.getName(src) || !in.eat(PUNCT, "->") || !in.getName(destName))
2203         bad("incorrect syntax");
2204
2205     // Break the src at '.'. This is awkward but the syntax looks nice.
2206     size_t j = src.find('.');
2207     if (j == string::npos || j == 0 || j == src.size() - 1)
2208         bad("incorrect syntax");
2209     fragName = src.substr(0, j);
2210     guardName = src.substr(j + 1);
2211
2212     Fragments::iterator i;
2213     if ((i=mFragments.find(fragName)) == mFragments.end())
2214         bad("invalid fragment reference");
2215     LirasmFragment *frag = &i->second;
2216     if (frag->mLabels.find(guardName) == frag->mLabels.end())
2217         bad("invalid guard reference");
2218     LIns *ins = frag->mLabels.find(guardName)->second;
2219     if ((i=mFragments.find(destName)) == mFragments.end())
2220         bad("invalid guard reference");
2221     ins->record()->exit->target = i->second.fragptr;
2222
2223     mAssm.patch(ins->record()->exit);
2224 }
2225
2226 void
2227 usageAndQuit(const string& progname)
2228 {
2229     cout <<
2230         "usage: " << progname << " [options] [filename]\n"
2231         "Options:\n"
2232         "  -h --help         print this message\n"
2233         "  -v --verbose      print LIR and assembly code\n"
2234         "  --execute         execute LIR\n"
2235         "  --[no-]optimize   enable or disable optimization of the LIR (default=off)\n"
2236         "  --random [N]      generate a random LIR block of size N (default=1000)\n"
2237         "\n"
2238         "Build query options (these print a value for this build of lirasm and exit)\n"
2239         "  --show-arch       show the architecture ('i386', 'X64', 'arm', 'ppc',\n"
2240         "                    'sparc', 'mips', or 'sh4')\n"
2241         "  --show-word-size  show the word size ('32' or '64')\n"
2242         "  --show-endianness show the endianness ('little-endian' or 'big-endian')\n"
2243         "\n"
2244         "i386-specific options:\n"
2245         "  --[no]sse         use SSE2 instructions (default=on)\n"
2246         "\n"
2247         "ARM-specific options:\n"
2248         "  --arch N          use ARM architecture version N instructions (default=7)\n"
2249         "  --[no]vfp         use ARM VFP instructions (default=on)\n"
2250         "\n"
2251         ;
2252     exit(0);
2253 }
2254
2255 void
2256 errMsgAndQuit(const string& progname, const string& msg)
2257 {
2258     cerr << progname << ": " << msg << endl;
2259     exit(1);
2260 }
2261
2262 struct CmdLineOptions {
2263     string  progname;
2264     bool    verbose;
2265     bool    execute;
2266     bool    optimize;
2267     int     random;
2268     string  filename;
2269 };
2270
2271 static void
2272 processCmdLine(int argc, char **argv, CmdLineOptions& opts)
2273 {
2274     opts.progname = argv[0];
2275     opts.verbose  = false;
2276     opts.execute  = false;
2277     opts.random   = 0;
2278     opts.optimize = false;
2279
2280     // Architecture-specific options.
2281 #if defined NANOJIT_IA32
2282     bool            i386_sse = true;
2283 #elif defined NANOJIT_ARM
2284     unsigned int    arm_arch = 7;
2285     bool            arm_vfp = true;
2286 #endif
2287
2288     for (int i = 1; i < argc; i++) {
2289         string arg = argv[i];
2290
2291         // Common flags for every architecture.
2292         if (arg == "-h" || arg == "--help")
2293             usageAndQuit(opts.progname);
2294         else if (arg == "-v" || arg == "--verbose")
2295             opts.verbose = true;
2296         else if (arg == "--execute")
2297             opts.execute = true;
2298         else if (arg == "--optimize")
2299             opts.optimize = true;
2300         else if (arg == "--no-optimize")
2301             opts.optimize = false;
2302         else if (arg == "--random") {
2303             const int defaultSize = 100;
2304             if (i == argc - 1) {
2305                 opts.random = defaultSize;      // no numeric argument, use default
2306             } else {
2307                 char* endptr;
2308                 int res = strtol(argv[i+1], &endptr, 10);
2309                 if ('\0' == *endptr) {
2310                     // We don't bother checking for overflow.
2311                     if (res <= 0)
2312                         errMsgAndQuit(opts.progname, "--random argument must be greater than zero");
2313                     opts.random = res;          // next arg is a number, use that for the size
2314                     i++;
2315                 } else {
2316                     opts.random = defaultSize;  // next arg is not a number
2317                 }
2318             }
2319         }
2320         else if (arg == "--show-arch") {
2321             const char* str =
2322 #if defined NANOJIT_IA32
2323                 "i386";
2324 #elif defined NANOJIT_X64
2325                 "X64";
2326 #elif defined NANOJIT_ARM
2327                 "arm";
2328 #elif defined NANOJIT_PPC
2329                 "ppc";
2330 #elif defined NANOJIT_SPARC
2331                 "sparc";
2332 #elif defined NANOJIT_MIPS
2333                 "mips";
2334 #elif defined NANOJIT_SH4
2335                 "sh4";
2336 #else
2337 #               error "unknown arch"
2338 #endif
2339             cout << str << "\n";
2340             exit(0);
2341         }
2342         else if (arg == "--show-word-size") {
2343             cout << sizeof(void*) * 8 << "\n";
2344             exit(0);
2345         }
2346         else if (arg == "--show-endianness") {
2347             int32_t x = 0x01020304;
2348             if (*(char*)&x == 0x1) {
2349               cout << "big-endian" << "\n";
2350             } else {
2351               cout << "little-endian" << "\n";
2352             }
2353             exit(0);
2354         }
2355
2356         // Architecture-specific flags.
2357 #if defined NANOJIT_IA32
2358         else if (arg == "--sse") {
2359             i386_sse = true;
2360         }
2361         else if (arg == "--nosse") {
2362             i386_sse = false;
2363         }
2364 #elif defined NANOJIT_ARM
2365         else if ((arg == "--arch") && (i < argc-1)) {
2366             char* endptr;
2367             arm_arch = strtoul(argv[i+1], &endptr, 10);
2368             // Check that the argument was a number.
2369             if ('\0' == *endptr) {
2370                 if ((arm_arch < 4) || (arm_arch > 7)) {
2371                     errMsgAndQuit(opts.progname, "Unsupported argument to --arch.\n");
2372                 }
2373             } else {
2374                 errMsgAndQuit(opts.progname, "Unrecognized argument to --arch.\n");
2375             }
2376             i++;
2377         } else if (arg == "--vfp") {
2378             arm_vfp = true;
2379         } else if (arg == "--novfp") {
2380             arm_vfp = false;
2381         }
2382 #endif
2383         // Input file names.
2384         else if (arg[0] != '-') {
2385             if (opts.filename.empty())
2386                 opts.filename = arg;
2387             else
2388                 errMsgAndQuit(opts.progname, "you can only specify one filename");
2389         }
2390         // No matching flag found, so report the error.
2391         else
2392             errMsgAndQuit(opts.progname, "bad option: " + arg);
2393     }
2394
2395     if ((!opts.random && opts.filename.empty()) || (opts.random && !opts.filename.empty()))
2396         errMsgAndQuit(opts.progname,
2397                       "you must specify either a filename or --random (but not both)");
2398
2399     // Handle the architecture-specific options.
2400 #if defined NANOJIT_IA32
2401     avmplus::AvmCore::config.i386_use_cmov = avmplus::AvmCore::config.i386_sse2 = i386_sse;
2402     avmplus::AvmCore::config.i386_fixed_esp = true;
2403 #elif defined NANOJIT_ARM
2404     // Warn about untested configurations.
2405     if ( ((arm_arch == 5) && (arm_vfp)) || ((arm_arch >= 6) && (!arm_vfp)) ) {
2406         char const * vfp_string = (arm_vfp) ? ("VFP") : ("no VFP");
2407         cerr << "Warning: This configuration (ARMv" << arm_arch << ", " << vfp_string << ") " <<
2408                 "is not regularly tested." << endl;
2409     }
2410
2411     avmplus::AvmCore::config.arm_arch = arm_arch;
2412     avmplus::AvmCore::config.arm_vfp = arm_vfp;
2413     avmplus::AvmCore::config.soft_float = !arm_vfp;
2414 #endif
2415 }
2416
2417 int
2418 main(int argc, char **argv)
2419 {
2420     CmdLineOptions opts;
2421     processCmdLine(argc, argv, opts);
2422
2423     Lirasm lasm(opts.verbose);
2424     if (opts.random) {
2425         lasm.assembleRandom(opts.random, opts.optimize);
2426     } else {
2427         ifstream in(opts.filename.c_str());
2428         if (!in)
2429             errMsgAndQuit(opts.progname, "unable to open file " + opts.filename);
2430         lasm.assemble(in, opts.optimize);
2431     }
2432
2433     Fragments::const_iterator i;
2434     if (opts.execute) {
2435         i = lasm.mFragments.find("main");
2436         if (i == lasm.mFragments.end())
2437             errMsgAndQuit(opts.progname, "error: at least one fragment must be named 'main'");
2438         switch (i->second.mReturnType) {
2439           case RT_INT: {
2440             int res = i->second.rint();
2441             cout << "Output is: " << res << endl;
2442             break;
2443           }
2444 #ifdef NANOJIT_64BIT
2445           case RT_QUAD: {
2446             int res = i->second.rquad();
2447             cout << "Output is: " << res << endl;
2448             break;
2449           }
2450 #endif
2451           case RT_DOUBLE: {
2452             double res = i->second.rdouble();
2453             cout << "Output is: " << res << endl;
2454             break;
2455           }
2456           case RT_GUARD: {
2457             LasmSideExit *ls = (LasmSideExit*) i->second.rguard()->exit;
2458             cout << "Exited block on line: " << ls->line << endl;
2459             break;
2460           }
2461         }
2462     } else {
2463         for (i = lasm.mFragments.begin(); i != lasm.mFragments.end(); i++)
2464             dump_srecords(cout, i->second.fragptr);
2465     }
2466 }