js/src/nanojit/LIR.h

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #ifndef __nanojit_LIR__
  41 #define __nanojit_LIR__
  42
  43 namespace nanojit
  44 {
  45     enum LOpcode
  46 #if defined(_MSC_VER) && _MSC_VER >= 1400
  47 #pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
  48           : unsigned
  49 #endif
  50     {
  51 #define OP___(op, number, repKind, retType, isCse) \
  52         LIR_##op = (number),
  53 #include "LIRopcode.tbl"
  54         LIR_sentinel,
  55 #undef OP___
  56
  57 #ifdef NANOJIT_64BIT
  58 #  define PTR_SIZE(a,b)  b
  59 #else
  60 #  define PTR_SIZE(a,b)  a
  61 #endif
  62
  63         // Pointer-sized synonyms.
  64
  65         LIR_paramp  = PTR_SIZE(LIR_parami,  LIR_paramq),
  66
  67         LIR_retp    = PTR_SIZE(LIR_reti,    LIR_retq),
  68
  69         LIR_livep   = PTR_SIZE(LIR_livei,   LIR_liveq),
  70
  71         LIR_ldp     = PTR_SIZE(LIR_ldi,     LIR_ldq),
  72
  73         LIR_stp     = PTR_SIZE(LIR_sti,     LIR_stq),
  74
  75         LIR_callp   = PTR_SIZE(LIR_calli,   LIR_callq),
  76
  77         LIR_eqp     = PTR_SIZE(LIR_eqi,     LIR_eqq),
  78         LIR_ltp     = PTR_SIZE(LIR_lti,     LIR_ltq),
  79         LIR_gtp     = PTR_SIZE(LIR_gti,     LIR_gtq),
  80         LIR_lep     = PTR_SIZE(LIR_lei,     LIR_leq),
  81         LIR_gep     = PTR_SIZE(LIR_gei,     LIR_geq),
  82         LIR_ltup    = PTR_SIZE(LIR_ltui,    LIR_ltuq),
  83         LIR_gtup    = PTR_SIZE(LIR_gtui,    LIR_gtuq),
  84         LIR_leup    = PTR_SIZE(LIR_leui,    LIR_leuq),
  85         LIR_geup    = PTR_SIZE(LIR_geui,    LIR_geuq),
  86
  87         LIR_addp    = PTR_SIZE(LIR_addi,    LIR_addq),
  88         LIR_subp    = PTR_SIZE(LIR_subi,    LIR_subq),
  89         LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
  90
  91         LIR_andp    = PTR_SIZE(LIR_andi,    LIR_andq),
  92         LIR_orp     = PTR_SIZE(LIR_ori,     LIR_orq),
  93         LIR_xorp    = PTR_SIZE(LIR_xori,    LIR_xorq),
  94
  95         LIR_lshp    = PTR_SIZE(LIR_lshi,    LIR_lshq),
  96         LIR_rshp    = PTR_SIZE(LIR_rshi,    LIR_rshq),
  97         LIR_rshup   = PTR_SIZE(LIR_rshui,   LIR_rshuq),
  98
  99         LIR_cmovp   = PTR_SIZE(LIR_cmovi,   LIR_cmovq)
 100     };
 101
 102     // 32-bit integer comparisons must be contiguous, as must 64-bit integer
 103     // comparisons and 64-bit float comparisons.
 104     NanoStaticAssert(LIR_eqi + 1 == LIR_lti  &&
 105                      LIR_eqi + 2 == LIR_gti  &&
 106                      LIR_eqi + 3 == LIR_lei  &&
 107                      LIR_eqi + 4 == LIR_gei  &&
 108                      LIR_eqi + 5 == LIR_ltui &&
 109                      LIR_eqi + 6 == LIR_gtui &&
 110                      LIR_eqi + 7 == LIR_leui &&
 111                      LIR_eqi + 8 == LIR_geui);
 112 #ifdef NANOJIT_64BIT
 113     NanoStaticAssert(LIR_eqq + 1 == LIR_ltq  &&
 114                      LIR_eqq + 2 == LIR_gtq  &&
 115                      LIR_eqq + 3 == LIR_leq  &&
 116                      LIR_eqq + 4 == LIR_geq  &&
 117                      LIR_eqq + 5 == LIR_ltuq &&
 118                      LIR_eqq + 6 == LIR_gtuq &&
 119                      LIR_eqq + 7 == LIR_leuq &&
 120                      LIR_eqq + 8 == LIR_geuq);
 121 #endif
 122     NanoStaticAssert(LIR_eqd + 1 == LIR_ltd &&
 123                      LIR_eqd + 2 == LIR_gtd &&
 124                      LIR_eqd + 3 == LIR_led &&
 125                      LIR_eqd + 4 == LIR_ged);
 126
 127     // Various opcodes must be changeable to their opposite with op^1
 128     // (although we use invertXyz() when possible, ie. outside static
 129     // assertions).
 130     NanoStaticAssert((LIR_jt^1) == LIR_jf && (LIR_jf^1) == LIR_jt);
 131
 132     NanoStaticAssert((LIR_xt^1) == LIR_xf && (LIR_xf^1) == LIR_xt);
 133
 134     NanoStaticAssert((LIR_lti^1)  == LIR_gti  && (LIR_gti^1)  == LIR_lti);
 135     NanoStaticAssert((LIR_lei^1)  == LIR_gei  && (LIR_gei^1)  == LIR_lei);
 136     NanoStaticAssert((LIR_ltui^1) == LIR_gtui && (LIR_gtui^1) == LIR_ltui);
 137     NanoStaticAssert((LIR_leui^1) == LIR_geui && (LIR_geui^1) == LIR_leui);
 138
 139 #ifdef NANOJIT_64BIT
 140     NanoStaticAssert((LIR_ltq^1)  == LIR_gtq  && (LIR_gtq^1)  == LIR_ltq);
 141     NanoStaticAssert((LIR_leq^1)  == LIR_geq  && (LIR_geq^1)  == LIR_leq);
 142     NanoStaticAssert((LIR_ltuq^1) == LIR_gtuq && (LIR_gtuq^1) == LIR_ltuq);
 143     NanoStaticAssert((LIR_leuq^1) == LIR_geuq && (LIR_geuq^1) == LIR_leuq);
 144 #endif
 145
 146     NanoStaticAssert((LIR_ltd^1) == LIR_gtd && (LIR_gtd^1) == LIR_ltd);
 147     NanoStaticAssert((LIR_led^1) == LIR_ged && (LIR_ged^1) == LIR_led);
 148
 149
 150     struct GuardRecord;
 151     struct SideExit;
 152
 153     enum AbiKind {
 154         ABI_FASTCALL,
 155         ABI_THISCALL,
 156         ABI_STDCALL,
 157         ABI_CDECL
 158     };
 159
 160     // This is much the same as LTy, but we need to distinguish signed and
 161     // unsigned 32-bit ints so that they will be extended to 64-bits correctly
 162     // on 64-bit platforms.
 163     //
 164     // All values must fit into three bits.  See CallInfo for details.
 165     enum ArgType {
 166         ARGTYPE_V  = 0,     // void
 167         ARGTYPE_I  = 1,     // int32_t
 168         ARGTYPE_UI = 2,     // uint32_t
 169 #ifdef NANOJIT_64BIT
 170         ARGTYPE_Q  = 3,     // uint64_t
 171 #endif
 172         ARGTYPE_D  = 4,     // double
 173
 174         // aliases
 175         ARGTYPE_P = PTR_SIZE(ARGTYPE_I, ARGTYPE_Q), // pointer
 176         ARGTYPE_B = ARGTYPE_I                       // bool
 177     };
 178
 179     enum IndirectCall {
 180         CALL_INDIRECT = 0
 181     };
 182
 183     //-----------------------------------------------------------------------
 184     // Aliasing
 185     // --------
 186     // *Aliasing* occurs when a single memory location can be accessed through
 187     // multiple names.  For example, consider this code:
 188     //
 189     //   ld a[0]
 190     //   sti b[0]
 191     //   ld a[0]
 192     //
 193     // In general, it's possible that a[0] and b[0] may refer to the same
 194     // memory location.  This means, for example, that you cannot safely
 195     // perform CSE on the two loads.  However, if you know that 'a' cannot be
 196     // an alias of 'b' (ie. the two loads do not alias with the store) then
 197     // you can safely perform CSE.
 198     //
 199     // Access regions
 200     // --------------
 201     // Doing alias analysis precisely is difficult.  But it turns out that
 202     // keeping track of aliasing at a coarse level is enough to help with many
 203     // optimisations.  So we conceptually divide the memory that is accessible
 204     // from LIR into a small number of "access regions" (aka. "Acc").  An
 205     // access region may be non-contiguous.  No two access regions can
 206     // overlap.  The union of all access regions covers all memory accessible
 207     // from LIR.
 208     //
 209     // In general a (static) load or store may be executed more than once, and
 210     // thus may access multiple regions;  however, in practice almost all
 211     // loads and stores will obviously access only a single region.  A
 212     // function called from LIR may load and/or store multiple access regions
 213     // (even if executed only once).
 214     //
 215     // If two loads/stores/calls are known to not access the same region(s),
 216     // then they do not alias.
 217     //
 218     // All regions are defined by the embedding.  It makes sense to add new
 219     // embedding-specific access regions when doing so will help with one or
 220     // more optimisations.
 221     //
 222     // Access region sets and instruction markings
 223     // -------------------------------------------
 224     // Each load/store is marked with an "access region set" (aka. "AccSet"),
 225     // which is a set of one or more access regions.  This indicates which
 226     // parts of LIR-accessible memory the load/store may touch.
 227     //
 228     // Each function called from LIR is also marked with an access region set
 229     // for memory stored to by the function.  (We could also have a marking
 230     // for memory loads done by the function, but there's no need at the
 231     // moment.)  These markings apply to the function itself, not the call
 232     // site, ie. they're not context-sensitive.
 233     //
 234     // These load/store/call markings MUST BE ACCURATE -- if not then invalid
 235     // optimisations might occur that change the meaning of the code.
 236     // However, they can safely be imprecise (ie. conservative), ie. a
 237     // load/store/call can be marked with an access region set that is a
 238     // superset of the actual access region set.  Such imprecision is safe but
 239     // may reduce optimisation opportunities.
 240     //
 241     // Optimisations that use access region info
 242     // -----------------------------------------
 243     // Currently only CseFilter uses this, and only for determining whether
 244     // loads can be CSE'd.  Note that CseFilter treats loads that are marked
 245     // with a single access region precisely, but all loads marked with
 246     // multiple access regions get lumped together.  So if you can't mark a
 247     // load with a single access region, you might as well use ACC_LOAD_ANY.
 248     //-----------------------------------------------------------------------
 249
 250     // An access region set is represented as a bitset.  Using a uint32_t
 251     // restricts us to at most 32 alias regions for the moment.  This could be
 252     // expanded to a uint64_t easily if needed.
 253     typedef uint32_t AccSet;
 254     static const int NUM_ACCS = sizeof(AccSet) * 8;
 255
 256     // Some common (non-singleton) access region sets.  ACCSET_NONE does not make
 257     // sense for loads or stores (which must access at least one region), it
 258     // only makes sense for calls.
 259     //
 260     static const AccSet ACCSET_NONE      = 0x0;
 261     static const AccSet ACCSET_ALL       = 0xffffffff;
 262     static const AccSet ACCSET_LOAD_ANY  = ACCSET_ALL;      // synonym
 263     static const AccSet ACCSET_STORE_ANY = ACCSET_ALL;      // synonym
 264
 265     inline bool isSingletonAccSet(AccSet accSet) {
 266         // This is a neat way of testing if a value has only one bit set.
 267         return (accSet & (accSet - 1)) == 0;
 268     }
 269
 270     // Full AccSets don't fit into load and store instructions.  But
 271     // load/store AccSets almost always contain a single access region.  We
 272     // take advantage of this to create a compressed AccSet, MiniAccSet, that
 273     // does fit.
 274     //
 275     // The 32 single-region AccSets get compressed into a number in the range
 276     // 0..31 (according to the position of the set bit), and all other
 277     // (multi-region) AccSets get converted into MINI_ACCSET_MULTIPLE.  So the
 278     // representation is lossy in the latter case, but that case is rare for
 279     // loads/stores.  We use a full AccSet for the storeAccSets of calls, for
 280     // which multi-region AccSets are common.
 281     //
 282     // We wrap the uint8_t inside a struct to avoid the possiblity of subtle
 283     // bugs caused by mixing up AccSet and MiniAccSet, which is easy to do.
 284     // However, the struct gets padded inside LInsLd in an inconsistent way on
 285     // Windows, so we actually store a MiniAccSetVal inside LInsLd.  Sigh.
 286     // But we use MiniAccSet everywhere else.
 287     //
 288     typedef uint8_t MiniAccSetVal;
 289     struct MiniAccSet { MiniAccSetVal val; };
 290     static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 };
 291
 292     static MiniAccSet compressAccSet(AccSet accSet) {
 293         if (isSingletonAccSet(accSet)) {
 294             MiniAccSet ret = { uint8_t(msbSet32(accSet)) };
 295             return ret;
 296         }
 297
 298         // If we got here, it must be a multi-region AccSet.
 299         return MINI_ACCSET_MULTIPLE;
 300     }
 301
 302     static AccSet decompressMiniAccSet(MiniAccSet miniAccSet) {
 303         return (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? ACCSET_ALL : (1 << miniAccSet.val);
 304     }
 305
 306     // The LoadQual affects how a load can be optimised:
 307     //
 308     // - CONST: These loads are guaranteed to always return the same value
 309     //   during a single execution of a fragment (but the value is allowed to
 310     //   change between executions of the fragment).  This means that the
 311     //   location is never stored to by the LIR, and is never modified by an
 312     //   external entity while the fragment is running.
 313     //
 314     // - NORMAL: These loads may be stored to by the LIR, but are never
 315     //   modified by an external entity while the fragment is running.
 316     //
 317     // - VOLATILE: These loads may be stored to by the LIR, and may be
 318     //   modified by an external entity while the fragment is running.
 319     //
 320     // This gives a lattice with the ordering:  CONST < NORMAL < VOLATILE.
 321     // As usual, it's safe to mark a load with a value higher (less precise)
 322     // that actual, but it may result in fewer optimisations occurring.
 323     //
 324     // Generally CONST loads are highly amenable to optimisation (eg. CSE),
 325     // VOLATILE loads are entirely unoptimisable, and NORMAL loads are in
 326     // between and require some alias analysis to optimise.
 327     //
 328     // Note that CONST has a stronger meaning to "const" in C and C++;  in C
 329     // and C++ a "const" variable may be modified by an external entity, such
 330     // as hardware.  Hence "const volatile" makes sense in C and C++, but
 331     // CONST+VOLATILE doesn't make sense in LIR.
 332     //
 333     // Note also that a 2-bit bitfield in LInsLd is used to hold LoadQual
 334     // values, so you can one add one more value without expanding it.
 335     //
 336     enum LoadQual {
 337         LOAD_CONST    = 0,
 338         LOAD_NORMAL   = 1,
 339         LOAD_VOLATILE = 2
 340     };
 341
 342     struct CallInfo
 343     {
 344     private:
 345         // In CallInfo::_typesig, each entry is three bits.
 346         static const int TYPESIG_FIELDSZB = 3;
 347         static const int TYPESIG_FIELDMASK = 7;
 348
 349     public:
 350         uintptr_t   _address;
 351         uint32_t    _typesig:27;     // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
 352         AbiKind     _abi:3;
 353         uint32_t    _isPure:1;      // _isPure=1 means no side-effects, result only depends on args
 354         AccSet      _storeAccSet;   // access regions stored by the function
 355         verbose_only ( const char* _name; )
 356
 357         // The following encode 'r func()' through to 'r func(a1, a2, a3, a4, a5, a6, a7, a8)'.
 358         static inline uint32_t typeSig0(ArgType r) {
 359             return r;
 360         }
 361         static inline uint32_t typeSig1(ArgType r, ArgType a1) {
 362             return a1 << TYPESIG_FIELDSZB*1 | typeSig0(r);
 363         }
 364         static inline uint32_t typeSig2(ArgType r, ArgType a1, ArgType a2) {
 365             return a1 << TYPESIG_FIELDSZB*2 | typeSig1(r, a2);
 366         }
 367         static inline uint32_t typeSig3(ArgType r, ArgType a1, ArgType a2, ArgType a3) {
 368             return a1 << TYPESIG_FIELDSZB*3 | typeSig2(r, a2, a3);
 369         }
 370         static inline uint32_t typeSig4(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4) {
 371             return a1 << TYPESIG_FIELDSZB*4 | typeSig3(r, a2, a3, a4);
 372         }
 373         static inline uint32_t typeSig5(ArgType r,  ArgType a1, ArgType a2, ArgType a3,
 374                                  ArgType a4, ArgType a5) {
 375             return a1 << TYPESIG_FIELDSZB*5 | typeSig4(r, a2, a3, a4, a5);
 376         }
 377         static inline uint32_t typeSig6(ArgType r, ArgType a1, ArgType a2, ArgType a3,
 378                                  ArgType a4, ArgType a5, ArgType a6) {
 379             return a1 << TYPESIG_FIELDSZB*6 | typeSig5(r, a2, a3, a4, a5, a6);
 380         }
 381         static inline uint32_t typeSig7(ArgType r,  ArgType a1, ArgType a2, ArgType a3,
 382                                  ArgType a4, ArgType a5, ArgType a6, ArgType a7) {
 383             return a1 << TYPESIG_FIELDSZB*7 | typeSig6(r, a2, a3, a4, a5, a6, a7);
 384         }
 385         static inline uint32_t typeSig8(ArgType r,  ArgType a1, ArgType a2, ArgType a3, ArgType a4,
 386                                  ArgType a5, ArgType a6, ArgType a7, ArgType a8) {
 387             return a1 << TYPESIG_FIELDSZB*8 | typeSig7(r, a2, a3, a4, a5, a6, a7, a8);
 388         }
 389         // Encode 'r func(a1, ..., aN))'
 390         static inline uint32_t typeSigN(ArgType r, int N, ArgType a[]) {
 391             uint32_t typesig = r;
 392             for (int i = 0; i < N; i++) {
 393                 typesig |= a[i] << TYPESIG_FIELDSZB*(N-i);
 394             }
 395             return typesig;
 396         }
 397
 398         uint32_t count_args() const;
 399         uint32_t count_int32_args() const;
 400         // Nb: uses right-to-left order, eg. sizes[0] is the size of the right-most arg.
 401         // XXX: See bug 525815 for fixing this.
 402         uint32_t getArgTypes(ArgType* types) const;
 403
 404         inline ArgType returnType() const {
 405             return ArgType(_typesig & TYPESIG_FIELDMASK);
 406         }
 407
 408         inline bool isIndirect() const {
 409             return _address < 256;
 410         }
 411     };
 412
 413     /*
 414      * Record for extra data used to compile switches as jump tables.
 415      */
 416     struct SwitchInfo
 417     {
 418         NIns**      table;       // Jump table; a jump address is NIns*
 419         uint32_t    count;       // Number of table entries
 420         // Index value at last execution of the switch. The index value
 421         // is the offset into the jump table. Thus it is computed as
 422         // (switch expression) - (lowest case value).
 423         uint32_t    index;
 424     };
 425
 426     // Array holding the 'isCse' field from LIRopcode.tbl.
 427     extern const int8_t isCses[];       // cannot be uint8_t, some values are negative
 428
 429     inline bool isCseOpcode(LOpcode op) {
 430         NanoAssert(isCses[op] != -1);   // see LIRopcode.tbl to understand this
 431         return isCses[op] == 1;
 432     }
 433     inline bool isLiveOpcode(LOpcode op) {
 434         return
 435 #if defined NANOJIT_64BIT
 436                op == LIR_liveq ||
 437 #endif
 438                op == LIR_livei || op == LIR_lived;
 439     }
 440     inline bool isRetOpcode(LOpcode op) {
 441         return
 442 #if defined NANOJIT_64BIT
 443             op == LIR_retq ||
 444 #endif
 445             op == LIR_reti || op == LIR_retd;
 446     }
 447     inline bool isCmovOpcode(LOpcode op) {
 448         return
 449 #if defined NANOJIT_64BIT
 450             op == LIR_cmovq ||
 451 #endif
 452             op == LIR_cmovi ||
 453             op == LIR_cmovd;
 454     }
 455     inline bool isCmpIOpcode(LOpcode op) {
 456         return LIR_eqi <= op && op <= LIR_geui;
 457     }
 458     inline bool isCmpSIOpcode(LOpcode op) {
 459         return LIR_eqi <= op && op <= LIR_gei;
 460     }
 461     inline bool isCmpUIOpcode(LOpcode op) {
 462         return LIR_eqi == op || (LIR_ltui <= op && op <= LIR_geui);
 463     }
 464 #ifdef NANOJIT_64BIT
 465     inline bool isCmpQOpcode(LOpcode op) {
 466         return LIR_eqq <= op && op <= LIR_geuq;
 467     }
 468     inline bool isCmpSQOpcode(LOpcode op) {
 469         return LIR_eqq <= op && op <= LIR_geq;
 470     }
 471     inline bool isCmpUQOpcode(LOpcode op) {
 472         return LIR_eqq == op || (LIR_ltuq <= op && op <= LIR_geuq);
 473     }
 474 #endif
 475     inline bool isCmpDOpcode(LOpcode op) {
 476         return LIR_eqd <= op && op <= LIR_ged;
 477     }
 478     inline bool isCmpOpcode(LOpcode op) {
 479         return isCmpIOpcode(op) ||
 480 #if defined NANOJIT_64BIT
 481                isCmpQOpcode(op) ||
 482 #endif
 483                isCmpDOpcode(op);
 484     }
 485
 486     inline LOpcode invertCondJmpOpcode(LOpcode op) {
 487         NanoAssert(op == LIR_jt || op == LIR_jf);
 488         return LOpcode(op ^ 1);
 489     }
 490     inline LOpcode invertCondGuardOpcode(LOpcode op) {
 491         NanoAssert(op == LIR_xt || op == LIR_xf);
 492         return LOpcode(op ^ 1);
 493     }
 494     inline LOpcode invertCmpOpcode(LOpcode op) {
 495         NanoAssert(isCmpOpcode(op));
 496         return LOpcode(op ^ 1);
 497     }
 498
 499     inline LOpcode getCallOpcode(const CallInfo* ci) {
 500         LOpcode op = LIR_callp;
 501         switch (ci->returnType()) {
 502         case ARGTYPE_V: op = LIR_callv; break;
 503         case ARGTYPE_I:
 504         case ARGTYPE_UI: op = LIR_calli; break;
 505 #ifdef NANOJIT_64BIT
 506         case ARGTYPE_Q: op = LIR_callq; break;
 507 #endif
 508         case ARGTYPE_D: op = LIR_calld; break;
 509         default:        NanoAssert(0);  break;
 510         }
 511         return op;
 512     }
 513
 514     LOpcode arithOpcodeD2I(LOpcode op);
 515 #ifdef NANOJIT_64BIT
 516     LOpcode cmpOpcodeI2Q(LOpcode op);
 517 #endif
 518     LOpcode cmpOpcodeD2I(LOpcode op);
 519     LOpcode cmpOpcodeD2UI(LOpcode op);
 520
 521     // Array holding the 'repKind' field from LIRopcode.tbl.
 522     extern const uint8_t repKinds[];
 523
 524     enum LTy {
 525         LTy_V,  // void: no value/no type
 526         LTy_I,  // int:  32-bit integer
 527 #ifdef NANOJIT_64BIT
 528         LTy_Q,  // quad: 64-bit integer
 529 #endif
 530         LTy_D,  // double: 64-bit float
 531
 532         LTy_P  = PTR_SIZE(LTy_I, LTy_Q)   // word-sized integer
 533     };
 534
 535     // Array holding the 'retType' field from LIRopcode.tbl.
 536     extern const LTy retTypes[];
 537
 538     inline RegisterMask rmask(Register r)
 539     {
 540         return RegisterMask(1) << REGNUM(r);
 541     }
 542
 543     //-----------------------------------------------------------------------
 544     // Low-level instructions.  This is a bit complicated, because we have a
 545     // variable-width representation to minimise space usage.
 546     //
 547     // - Instruction size is always an integral multiple of word size.
 548     //
 549     // - Every instruction has at least one word, holding the opcode and the
 550     //   reservation info ("SharedFields").  That word is in class LIns.
 551     //
 552     // - Beyond that, most instructions have 1, 2 or 3 extra words.  These
 553     //   extra words are in classes LInsOp1, LInsOp2, etc (collectively called
 554     //   "LInsXYZ" in what follows).  Each LInsXYZ class also contains an LIns,
 555     //   accessible by the 'ins' member, which holds the LIns data.
 556     //
 557     // - LIR is written forward, but read backwards.  When reading backwards,
 558     //   in order to find the opcode, it must be in a predictable place in the
 559     //   LInsXYZ isn't affected by instruction width.  Therefore, the LIns
 560     //   word (which contains the opcode) is always the *last* word in an
 561     //   instruction.
 562     //
 563     // - Each instruction is created by casting pre-allocated bytes from a
 564     //   LirBuffer to the LInsXYZ type.  Therefore there are no constructors
 565     //   for LIns or LInsXYZ.
 566     //
 567     // - The standard handle for an instruction is a LIns*.  This actually
 568     //   points to the LIns word, ie. to the final word in the instruction.
 569     //   This is a bit odd, but it allows the instruction's opcode to be
 570     //   easily accessed.  Once you've looked at the opcode and know what kind
 571     //   of instruction it is, if you want to access any of the other words,
 572     //   you need to use toLInsXYZ(), which takes the LIns* and gives you an
 573     //   LInsXYZ*, ie. the pointer to the actual start of the instruction's
 574     //   bytes.  From there you can access the instruction-specific extra
 575     //   words.
 576     //
 577     // - However, from outside class LIns, LInsXYZ isn't visible, nor is
 578     //   toLInsXYZ() -- from outside LIns, all LIR instructions are handled
 579     //   via LIns pointers and get/set methods are used for all LIns/LInsXYZ
 580     //   accesses.  In fact, all data members in LInsXYZ are private and can
 581     //   only be accessed by LIns, which is a friend class.  The only thing
 582     //   anyone outside LIns can do with a LInsXYZ is call getLIns().
 583     //
 584     // - An example Op2 instruction and the likely pointers to it (each line
 585     //   represents a word, and pointers to a line point to the start of the
 586     //   word on that line):
 587     //
 588     //      [ oprnd_2         <-- LInsOp2* insOp2 == toLInsOp2(ins)
 589     //        oprnd_1
 590     //        opcode + resv ] <-- LIns* ins
 591     //
 592     // - LIR_skip instructions are used to link code chunks.  If the first
 593     //   instruction on a chunk isn't a LIR_start, it will be a skip, and the
 594     //   skip's operand will point to the last LIns on the preceding chunk.
 595     //   LInsSk has the same layout as LInsOp1, but we represent it as a
 596     //   different class because there are some places where we treat
 597     //   skips specially and so having it separate seems like a good idea.
 598     //
 599     // - Various things about the size and layout of LIns and LInsXYZ are
 600     //   statically checked in staticSanityCheck().  In particular, this is
 601     //   worthwhile because there's nothing that guarantees that all the
 602     //   LInsXYZ classes have a size that is a multiple of word size (but in
 603     //   practice all sane compilers use a layout that results in this).  We
 604     //   also check that every LInsXYZ is word-aligned in
 605     //   LirBuffer::makeRoom();  this seems sensible to avoid potential
 606     //   slowdowns due to misalignment.  It relies on chunks themselves being
 607     //   word-aligned, which is extremely likely.
 608     //
 609     // - There is an enum, LInsRepKind, with one member for each of the
 610     //   LInsXYZ kinds.  Each opcode is categorised with its LInsRepKind value
 611     //   in LIRopcode.tbl, and this is used in various places.
 612     //-----------------------------------------------------------------------
 613
 614     enum LInsRepKind {
 615         // LRK_XYZ corresponds to class LInsXYZ.
 616         LRK_Op0,
 617         LRK_Op1,
 618         LRK_Op2,
 619         LRK_Op3,
 620         LRK_Ld,
 621         LRK_St,
 622         LRK_Sk,
 623         LRK_C,
 624         LRK_P,
 625         LRK_I,
 626         LRK_QorD,
 627         LRK_Jtbl,
 628         LRK_None    // this one is used for unused opcode numbers
 629     };
 630
 631     class LInsOp0;
 632     class LInsOp1;
 633     class LInsOp2;
 634     class LInsOp3;
 635     class LInsLd;
 636     class LInsSt;
 637     class LInsSk;
 638     class LInsC;
 639     class LInsP;
 640     class LInsI;
 641     class LInsQorD;
 642     class LInsJtbl;
 643
 644     class LIns
 645     {
 646     private:
 647         // SharedFields: fields shared by all LIns kinds.
 648         //
 649         // The .inReg, .regnum, .inAr and .arIndex fields form a "reservation"
 650         // that is used temporarily during assembly to record information
 651         // relating to register allocation.  See class RegAlloc for more
 652         // details.  Note: all combinations of .inReg/.inAr are possible, ie.
 653         // 0/0, 0/1, 1/0, 1/1.
 654         //
 655         // The .isResultLive field is only used for instructions that return
 656         // results.  It indicates if the result is live.  It's set (if
 657         // appropriate) and used only during the codegen pass.
 658         //
 659         struct SharedFields {
 660             uint32_t inReg:1;           // if 1, 'reg' is active
 661             uint32_t regnum:7;
 662             uint32_t inAr:1;            // if 1, 'arIndex' is active
 663             uint32_t isResultLive:1;    // if 1, the instruction's result is live
 664
 665             uint32_t arIndex:14;        // index into stack frame;  displ is -4*arIndex
 666
 667             LOpcode  opcode:8;          // instruction's opcode
 668         };
 669
 670         union {
 671             SharedFields sharedFields;
 672             // Force sizeof(LIns)==8 and 8-byte alignment on 64-bit machines.
 673             // This is necessary because sizeof(SharedFields)==4 and we want all
 674             // instances of LIns to be pointer-aligned.
 675             void* wholeWord;
 676         };
 677
 678         inline void initSharedFields(LOpcode opcode)
 679         {
 680             // We must zero .inReg, .inAR and .isResultLive, but zeroing the
 681             // whole word is easier.  Then we set the opcode.
 682             wholeWord = 0;
 683             sharedFields.opcode = opcode;
 684         }
 685
 686         // LIns-to-LInsXYZ converters.
 687         inline LInsOp0* toLInsOp0() const;
 688         inline LInsOp1* toLInsOp1() const;
 689         inline LInsOp2* toLInsOp2() const;
 690         inline LInsOp3* toLInsOp3() const;
 691         inline LInsLd*  toLInsLd()  const;
 692         inline LInsSt*  toLInsSt()  const;
 693         inline LInsSk*  toLInsSk()  const;
 694         inline LInsC*   toLInsC()   const;
 695         inline LInsP*   toLInsP()   const;
 696         inline LInsI*   toLInsI()   const;
 697         inline LInsQorD* toLInsQorD() const;
 698         inline LInsJtbl*toLInsJtbl()const;
 699
 700         void staticSanityCheck();
 701
 702     public:
 703         // LIns initializers.
 704         inline void initLInsOp0(LOpcode opcode);
 705         inline void initLInsOp1(LOpcode opcode, LIns* oprnd1);
 706         inline void initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2);
 707         inline void initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3);
 708         inline void initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual);
 709         inline void initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet);
 710         inline void initLInsSk(LIns* prevLIns);
 711         // Nb: args[] must be allocated and initialised before being passed in;
 712         // initLInsC() just copies the pointer into the LInsC.
 713         inline void initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci);
 714         inline void initLInsP(int32_t arg, int32_t kind);
 715         inline void initLInsI(LOpcode opcode, int32_t immI);
 716         inline void initLInsQorD(LOpcode opcode, uint64_t immQorD);
 717         inline void initLInsJtbl(LIns* index, uint32_t size, LIns** table);
 718
 719         LOpcode opcode() const { return sharedFields.opcode; }
 720
 721         // Generally, void instructions (statements) are always live and
 722         // non-void instructions (expressions) are live if used by another
 723         // live instruction.  But there are some trickier cases.
 724         // Any non-void instruction can be marked isResultLive=1 even
 725         // when it is unreachable, e.g. due to an always-taken branch.
 726         // The assembler marks it live if it sees any uses, regardless of
 727         // whether those uses are in reachable code or not.
 728         bool isLive() const {
 729             return isV() ||
 730                    sharedFields.isResultLive ||
 731                    (isCall() && !callInfo()->_isPure) ||    // impure calls are always live
 732                    isop(LIR_paramp);                        // LIR_paramp is always live
 733         }
 734         void setResultLive() {
 735             NanoAssert(!isV());
 736             sharedFields.isResultLive = 1;
 737         }
 738
 739         // XXX: old reservation manipulating functions.  See bug 538924.
 740         // Replacement strategy:
 741         // - deprecated_markAsClear() --> clearReg() and/or clearArIndex()
 742         // - deprecated_hasKnownReg() --> isInReg()
 743         // - deprecated_getReg() --> getReg() after checking isInReg()
 744         //
 745         void deprecated_markAsClear() {
 746             sharedFields.inReg = 0;
 747             sharedFields.inAr = 0;
 748         }
 749         bool deprecated_hasKnownReg() {
 750             NanoAssert(isExtant());
 751             return isInReg();
 752         }
 753         Register deprecated_getReg() {
 754             NanoAssert(isExtant());
 755             if (isInReg()) {
 756                 Register r = { sharedFields.regnum };
 757                 return r;
 758             } else {
 759                 return deprecated_UnknownReg;
 760             }
 761         }
 762         uint32_t deprecated_getArIndex() {
 763             NanoAssert(isExtant());
 764             return ( isInAr() ? sharedFields.arIndex : 0 );
 765         }
 766
 767         // Reservation manipulation.
 768         //
 769         // "Extant" mean "in existence, still existing, surviving".  In other
 770         // words, has the value been computed explicitly (not folded into
 771         // something else) and is it still available (in a register or spill
 772         // slot) for use?
 773         bool isExtant() {
 774             return isInReg() || isInAr();
 775         }
 776         bool isInReg() {
 777             return sharedFields.inReg;
 778         }
 779         bool isInRegMask(RegisterMask allow) {
 780             return isInReg() && (rmask(getReg()) & allow);
 781         }
 782         Register getReg() {
 783             NanoAssert(isInReg());
 784             Register r = { sharedFields.regnum };
 785             return r;
 786         }
 787         void setReg(Register r) {
 788             sharedFields.inReg = 1;
 789             sharedFields.regnum = REGNUM(r);
 790         }
 791         void clearReg() {
 792             sharedFields.inReg = 0;
 793         }
 794         bool isInAr() {
 795             return sharedFields.inAr;
 796         }
 797         uint32_t getArIndex() {
 798             NanoAssert(isInAr());
 799             return sharedFields.arIndex;
 800         }
 801         void setArIndex(uint32_t arIndex) {
 802             sharedFields.inAr = 1;
 803             sharedFields.arIndex = arIndex;
 804         }
 805         void clearArIndex() {
 806             sharedFields.inAr = 0;
 807         }
 808
 809         // For various instruction kinds.
 810         inline LIns*    oprnd1() const;
 811         inline LIns*    oprnd2() const;
 812         inline LIns*    oprnd3() const;
 813
 814         // For branches.
 815         inline LIns*    getTarget() const;
 816         inline void     setTarget(LIns* label);
 817
 818         // For guards.
 819         inline GuardRecord* record() const;
 820
 821         // For loads.
 822         inline LoadQual loadQual() const;
 823
 824         // For loads/stores.
 825         inline int32_t  disp() const;
 826         inline MiniAccSet miniAccSet() const;
 827         inline AccSet   accSet() const;
 828
 829         // For LInsSk.
 830         inline LIns*    prevLIns() const;
 831
 832         // For LInsP.
 833         inline uint8_t  paramArg()  const;
 834         inline uint8_t  paramKind() const;
 835
 836         // For LInsI.
 837         inline int32_t  immI() const;
 838
 839         // For LInsQorD.
 840 #ifdef NANOJIT_64BIT
 841         inline int32_t  immQlo() const;
 842         inline uint64_t immQ() const;
 843 #endif
 844         inline int32_t  immDlo() const;
 845         inline int32_t  immDhi() const;
 846         inline double   immD() const;
 847         inline uint64_t immDasQ() const;
 848
 849         // For LIR_allocp.
 850         inline int32_t  size()    const;
 851         inline void     setSize(int32_t nbytes);
 852
 853         // For LInsC.
 854         inline LIns*    arg(uint32_t i)         const;  // right-to-left-order: arg(0) is rightmost
 855         inline uint32_t argc()                  const;
 856         inline LIns*    callArgN(uint32_t n)    const;
 857         inline const CallInfo* callInfo()       const;
 858
 859         // For LIR_jtbl
 860         inline uint32_t getTableSize() const;
 861         inline LIns* getTarget(uint32_t index) const;
 862         inline void setTarget(uint32_t index, LIns* label) const;
 863
 864         // isLInsXYZ() returns true if the instruction has the LInsXYZ form.
 865         // Note that there is some overlap with other predicates, eg.
 866         // isStore()==isLInsSt(), isCall()==isLInsC(), but that's ok;  these
 867         // ones are used mostly to check that opcodes are appropriate for
 868         // instruction layouts, the others are used for non-debugging
 869         // purposes.
 870         bool isLInsOp0() const {
 871             NanoAssert(LRK_None != repKinds[opcode()]);
 872             return LRK_Op0 == repKinds[opcode()];
 873         }
 874         bool isLInsOp1() const {
 875             NanoAssert(LRK_None != repKinds[opcode()]);
 876             return LRK_Op1 == repKinds[opcode()];
 877         }
 878         bool isLInsOp2() const {
 879             NanoAssert(LRK_None != repKinds[opcode()]);
 880             return LRK_Op2 == repKinds[opcode()];
 881         }
 882         bool isLInsOp3() const {
 883             NanoAssert(LRK_None != repKinds[opcode()]);
 884             return LRK_Op3 == repKinds[opcode()];
 885         }
 886         bool isLInsLd() const {
 887             NanoAssert(LRK_None != repKinds[opcode()]);
 888             return LRK_Ld == repKinds[opcode()];
 889         }
 890         bool isLInsSt() const {
 891             NanoAssert(LRK_None != repKinds[opcode()]);
 892             return LRK_St == repKinds[opcode()];
 893         }
 894         bool isLInsSk() const {
 895             NanoAssert(LRK_None != repKinds[opcode()]);
 896             return LRK_Sk == repKinds[opcode()];
 897         }
 898         bool isLInsC() const {
 899             NanoAssert(LRK_None != repKinds[opcode()]);
 900             return LRK_C == repKinds[opcode()];
 901         }
 902         bool isLInsP() const {
 903             NanoAssert(LRK_None != repKinds[opcode()]);
 904             return LRK_P == repKinds[opcode()];
 905         }
 906         bool isLInsI() const {
 907             NanoAssert(LRK_None != repKinds[opcode()]);
 908             return LRK_I == repKinds[opcode()];
 909         }
 910         bool isLInsQorD() const {
 911             NanoAssert(LRK_None != repKinds[opcode()]);
 912             return LRK_QorD == repKinds[opcode()];
 913         }
 914         bool isLInsJtbl() const {
 915             NanoAssert(LRK_None != repKinds[opcode()]);
 916             return LRK_Jtbl == repKinds[opcode()];
 917         }
 918
 919         // LIns predicates.
 920         bool isop(LOpcode o) const {
 921             return opcode() == o;
 922         }
 923         bool isRet() const {
 924             return isRetOpcode(opcode());
 925         }
 926         bool isCmp() const {
 927             return isCmpOpcode(opcode());
 928         }
 929         bool isCall() const {
 930             return isop(LIR_callv) ||
 931                    isop(LIR_calli) ||
 932 #if defined NANOJIT_64BIT
 933                    isop(LIR_callq) ||
 934 #endif
 935                    isop(LIR_calld);
 936         }
 937         bool isCmov() const {
 938             return isCmovOpcode(opcode());
 939         }
 940         bool isStore() const {
 941             return isLInsSt();
 942         }
 943         bool isLoad() const {
 944             return isLInsLd();
 945         }
 946         bool isGuard() const {
 947             return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) ||
 948                    isop(LIR_xbarrier) || isop(LIR_xtbl) ||
 949                    isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
 950         }
 951         bool isJov() const {
 952             return
 953 #ifdef NANOJIT_64BIT
 954                 isop(LIR_addjovq) || isop(LIR_subjovq) ||
 955 #endif
 956                 isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
 957         }
 958         // True if the instruction is a 32-bit integer immediate.
 959         bool isImmI() const {
 960             return isop(LIR_immi);
 961         }
 962         // True if the instruction is a 32-bit integer immediate and
 963         // has the value 'val' when treated as a 32-bit signed integer.
 964         bool isImmI(int32_t val) const {
 965             return isImmI() && immI()==val;
 966         }
 967 #ifdef NANOJIT_64BIT
 968         // True if the instruction is a 64-bit integer immediate.
 969         bool isImmQ() const {
 970             return isop(LIR_immq);
 971         }
 972 #endif
 973         // True if the instruction is a pointer-sized integer immediate.
 974         bool isImmP() const
 975         {
 976 #ifdef NANOJIT_64BIT
 977             return isImmQ();
 978 #else
 979             return isImmI();
 980 #endif
 981         }
 982         // True if the instruction is a 64-bit float immediate.
 983         bool isImmD() const {
 984             return isop(LIR_immd);
 985         }
 986         // True if the instruction is a 64-bit integer or float immediate.
 987         bool isImmQorD() const {
 988             return
 989 #ifdef NANOJIT_64BIT
 990                 isImmQ() ||
 991 #endif
 992                 isImmD();
 993         }
 994         // True if the instruction an any type of immediate.
 995         bool isImmAny() const {
 996             return isImmI() || isImmQorD();
 997         }
 998
 999         bool isBranch() const {
1000             return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
1001         }
1002
1003         LTy retType() const {
1004             return retTypes[opcode()];
1005         }
1006         bool isV() const {
1007             return retType() == LTy_V;
1008         }
1009         bool isI() const {
1010             return retType() == LTy_I;
1011         }
1012 #ifdef NANOJIT_64BIT
1013         bool isQ() const {
1014             return retType() == LTy_Q;
1015         }
1016 #endif
1017         bool isD() const {
1018             return retType() == LTy_D;
1019         }
1020         bool isQorD() const {
1021             return
1022 #ifdef NANOJIT_64BIT
1023                 isQ() ||
1024 #endif
1025                 isD();
1026         }
1027         bool isP() const {
1028 #ifdef NANOJIT_64BIT
1029             return isQ();
1030 #else
1031             return isI();
1032 #endif
1033         }
1034
1035         inline void* immP() const
1036         {
1037         #ifdef NANOJIT_64BIT
1038             return (void*)immQ();
1039         #else
1040             return (void*)immI();
1041         #endif
1042         }
1043     };
1044
1045     typedef SeqBuilder<LIns*> InsList;
1046     typedef SeqBuilder<char*> StringList;
1047
1048
1049     // 0-operand form.  Used for LIR_start and LIR_label.
1050     class LInsOp0
1051     {
1052     private:
1053         friend class LIns;
1054
1055         LIns        ins;
1056
1057     public:
1058         LIns* getLIns() { return &ins; };
1059     };
1060
1061     // 1-operand form.  Used for LIR_reti, unary arithmetic/logic ops, etc.
1062     class LInsOp1
1063     {
1064     private:
1065         friend class LIns;
1066
1067         LIns*       oprnd_1;
1068
1069         LIns        ins;
1070
1071     public:
1072         LIns* getLIns() { return &ins; };
1073     };
1074
1075     // 2-operand form.  Used for guards, branches, comparisons, binary
1076     // arithmetic/logic ops, etc.
1077     class LInsOp2
1078     {
1079     private:
1080         friend class LIns;
1081
1082         LIns*       oprnd_2;
1083
1084         LIns*       oprnd_1;
1085
1086         LIns        ins;
1087
1088     public:
1089         LIns* getLIns() { return &ins; };
1090     };
1091
1092     // 3-operand form.  Used for conditional moves, jov branches, and xov guards.
1093     class LInsOp3
1094     {
1095     private:
1096         friend class LIns;
1097
1098         LIns*       oprnd_3;
1099
1100         LIns*       oprnd_2;
1101
1102         LIns*       oprnd_1;
1103
1104         LIns        ins;
1105
1106     public:
1107         LIns* getLIns() { return &ins; };
1108     };
1109
1110     // Used for all loads.
1111     class LInsLd
1112     {
1113     private:
1114         friend class LIns;
1115
1116         // Nb: the LIR writer pipeline handles things if a displacement
1117         // exceeds 16 bits.  This is rare, but does happen occasionally.  We
1118         // could go to 24 bits but then it would happen so rarely that the
1119         // handler code would be difficult to test and thus untrustworthy.
1120         //
1121         // Nb: the types of these bitfields are all 32-bit integers to ensure
1122         // they are fully packed on Windows, sigh.  Also, 'loadQual' is
1123         // unsigned to ensure the values 0, 1, and 2 all fit in 2 bits.
1124         //
1125         // Nb: explicit signed keyword for bitfield types is required,
1126         // some compilers may treat them as unsigned without it.
1127         // See Bugzilla 584219 comment #18
1128         signed int  disp:16;
1129         signed int  miniAccSetVal:8;
1130         uint32_t    loadQual:2;
1131
1132         LIns*       oprnd_1;
1133
1134         LIns        ins;
1135
1136     public:
1137         LIns* getLIns() { return &ins; };
1138     };
1139
1140     // Used for all stores.
1141     class LInsSt
1142     {
1143     private:
1144         friend class LIns;
1145
1146         int16_t     disp;
1147         MiniAccSetVal miniAccSetVal;
1148
1149         LIns*       oprnd_2;
1150
1151         LIns*       oprnd_1;
1152
1153         LIns        ins;
1154
1155     public:
1156         LIns* getLIns() { return &ins; };
1157     };
1158
1159     // Used for LIR_skip.
1160     class LInsSk
1161     {
1162     private:
1163         friend class LIns;
1164
1165         LIns*       prevLIns;
1166
1167         LIns        ins;
1168
1169     public:
1170         LIns* getLIns() { return &ins; };
1171     };
1172
1173     // Used for all variants of LIR_call.
1174     class LInsC
1175     {
1176     private:
1177         friend class LIns;
1178
1179         // Arguments in reverse order, just like insCall() (ie. args[0] holds
1180         // the rightmost arg).  The array should be allocated by the same
1181         // allocator as the LIR buffers, because it has the same lifetime.
1182         LIns**      args;
1183
1184         const CallInfo* ci;
1185
1186         LIns        ins;
1187
1188     public:
1189         LIns* getLIns() { return &ins; };
1190     };
1191
1192     // Used for LIR_paramp.
1193     class LInsP
1194     {
1195     private:
1196         friend class LIns;
1197
1198         uintptr_t   arg:8;
1199         uintptr_t   kind:8;
1200
1201         LIns        ins;
1202
1203     public:
1204         LIns* getLIns() { return &ins; };
1205     };
1206
1207     // Used for LIR_immi and LIR_allocp.
1208     class LInsI
1209     {
1210     private:
1211         friend class LIns;
1212
1213         int32_t     immI;
1214
1215         LIns        ins;
1216
1217     public:
1218         LIns* getLIns() { return &ins; };
1219     };
1220
1221     // Used for LIR_immq and LIR_immd.
1222     class LInsQorD
1223     {
1224     private:
1225         friend class LIns;
1226
1227         int32_t     immQorDlo;
1228
1229         int32_t     immQorDhi;
1230
1231         LIns        ins;
1232
1233     public:
1234         LIns* getLIns() { return &ins; };
1235     };
1236
1237     // Used for LIR_jtbl.  'oprnd_1' must be a uint32_t index in
1238     // the range 0 <= index < size; no range check is performed.
1239     // 'table' is an array of labels.
1240     class LInsJtbl
1241     {
1242     private:
1243         friend class LIns;
1244
1245         uint32_t    size;     // number of entries in table
1246         LIns**      table;    // pointer to table[size] with same lifetime as this LInsJtbl
1247         LIns*       oprnd_1;  // uint32_t index expression
1248
1249         LIns        ins;
1250
1251     public:
1252         LIns* getLIns() { return &ins; }
1253     };
1254
1255     // Used only as a placeholder for OP___ macros for unused opcodes in
1256     // LIRopcode.tbl.
1257     class LInsNone
1258     {
1259     };
1260
1261     LInsOp0*  LIns::toLInsOp0()  const { return (LInsOp0* )(uintptr_t(this+1) - sizeof(LInsOp0 )); }
1262     LInsOp1*  LIns::toLInsOp1()  const { return (LInsOp1* )(uintptr_t(this+1) - sizeof(LInsOp1 )); }
1263     LInsOp2*  LIns::toLInsOp2()  const { return (LInsOp2* )(uintptr_t(this+1) - sizeof(LInsOp2 )); }
1264     LInsOp3*  LIns::toLInsOp3()  const { return (LInsOp3* )(uintptr_t(this+1) - sizeof(LInsOp3 )); }
1265     LInsLd*   LIns::toLInsLd()   const { return (LInsLd*  )(uintptr_t(this+1) - sizeof(LInsLd  )); }
1266     LInsSt*   LIns::toLInsSt()   const { return (LInsSt*  )(uintptr_t(this+1) - sizeof(LInsSt  )); }
1267     LInsSk*   LIns::toLInsSk()   const { return (LInsSk*  )(uintptr_t(this+1) - sizeof(LInsSk  )); }
1268     LInsC*    LIns::toLInsC()    const { return (LInsC*   )(uintptr_t(this+1) - sizeof(LInsC   )); }
1269     LInsP*    LIns::toLInsP()    const { return (LInsP*   )(uintptr_t(this+1) - sizeof(LInsP   )); }
1270     LInsI*    LIns::toLInsI()    const { return (LInsI*   )(uintptr_t(this+1) - sizeof(LInsI   )); }
1271     LInsQorD* LIns::toLInsQorD() const { return (LInsQorD*)(uintptr_t(this+1) - sizeof(LInsQorD)); }
1272     LInsJtbl* LIns::toLInsJtbl() const { return (LInsJtbl*)(uintptr_t(this+1) - sizeof(LInsJtbl)); }
1273
1274     void LIns::initLInsOp0(LOpcode opcode) {
1275         initSharedFields(opcode);
1276         NanoAssert(isLInsOp0());
1277     }
1278     void LIns::initLInsOp1(LOpcode opcode, LIns* oprnd1) {
1279         initSharedFields(opcode);
1280         toLInsOp1()->oprnd_1 = oprnd1;
1281         NanoAssert(isLInsOp1());
1282     }
1283     void LIns::initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2) {
1284         initSharedFields(opcode);
1285         toLInsOp2()->oprnd_1 = oprnd1;
1286         toLInsOp2()->oprnd_2 = oprnd2;
1287         NanoAssert(isLInsOp2());
1288     }
1289     void LIns::initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3) {
1290         initSharedFields(opcode);
1291         toLInsOp3()->oprnd_1 = oprnd1;
1292         toLInsOp3()->oprnd_2 = oprnd2;
1293         toLInsOp3()->oprnd_3 = oprnd3;
1294         NanoAssert(isLInsOp3());
1295     }
1296     void LIns::initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual) {
1297         initSharedFields(opcode);
1298         toLInsLd()->oprnd_1 = val;
1299         NanoAssert(d == int16_t(d));
1300         toLInsLd()->disp = int16_t(d);
1301         toLInsLd()->miniAccSetVal = compressAccSet(accSet).val;
1302         toLInsLd()->loadQual = loadQual;
1303         NanoAssert(isLInsLd());
1304     }
1305     void LIns::initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet) {
1306         initSharedFields(opcode);
1307         toLInsSt()->oprnd_1 = val;
1308         toLInsSt()->oprnd_2 = base;
1309         NanoAssert(d == int16_t(d));
1310         toLInsSt()->disp = int16_t(d);
1311         toLInsSt()->miniAccSetVal = compressAccSet(accSet).val;
1312         NanoAssert(isLInsSt());
1313     }
1314     void LIns::initLInsSk(LIns* prevLIns) {
1315         initSharedFields(LIR_skip);
1316         toLInsSk()->prevLIns = prevLIns;
1317         NanoAssert(isLInsSk());
1318     }
1319     void LIns::initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci) {
1320         initSharedFields(opcode);
1321         toLInsC()->args = args;
1322         toLInsC()->ci = ci;
1323         NanoAssert(isLInsC());
1324     }
1325     void LIns::initLInsP(int32_t arg, int32_t kind) {
1326         initSharedFields(LIR_paramp);
1327         NanoAssert(isU8(arg) && isU8(kind));
1328         toLInsP()->arg = arg;
1329         toLInsP()->kind = kind;
1330         NanoAssert(isLInsP());
1331     }
1332     void LIns::initLInsI(LOpcode opcode, int32_t immI) {
1333         initSharedFields(opcode);
1334         toLInsI()->immI = immI;
1335         NanoAssert(isLInsI());
1336     }
1337     void LIns::initLInsQorD(LOpcode opcode, uint64_t immQorD) {
1338         initSharedFields(opcode);
1339         toLInsQorD()->immQorDlo = int32_t(immQorD);
1340         toLInsQorD()->immQorDhi = int32_t(immQorD >> 32);
1341         NanoAssert(isLInsQorD());
1342     }
1343     void LIns::initLInsJtbl(LIns* index, uint32_t size, LIns** table) {
1344         initSharedFields(LIR_jtbl);
1345         toLInsJtbl()->oprnd_1 = index;
1346         toLInsJtbl()->table = table;
1347         toLInsJtbl()->size = size;
1348         NanoAssert(isLInsJtbl());
1349     }
1350
1351     LIns* LIns::oprnd1() const {
1352         NanoAssert(isLInsOp1() || isLInsOp2() || isLInsOp3() || isLInsLd() || isLInsSt() || isLInsJtbl());
1353         return toLInsOp2()->oprnd_1;
1354     }
1355     LIns* LIns::oprnd2() const {
1356         NanoAssert(isLInsOp2() || isLInsOp3() || isLInsSt());
1357         return toLInsOp2()->oprnd_2;
1358     }
1359     LIns* LIns::oprnd3() const {
1360         NanoAssert(isLInsOp3());
1361         return toLInsOp3()->oprnd_3;
1362     }
1363
1364     LIns* LIns::getTarget() const {
1365         NanoAssert(isBranch() && !isop(LIR_jtbl));
1366         if (isJov())
1367             return oprnd3();
1368         else
1369             return oprnd2();
1370     }
1371
1372     void LIns::setTarget(LIns* label) {
1373         NanoAssert(label && label->isop(LIR_label));
1374         NanoAssert(isBranch() && !isop(LIR_jtbl));
1375         if (isJov())
1376             toLInsOp3()->oprnd_3 = label;
1377         else
1378             toLInsOp2()->oprnd_2 = label;
1379     }
1380
1381     LIns* LIns::getTarget(uint32_t index) const {
1382         NanoAssert(isop(LIR_jtbl));
1383         NanoAssert(index < toLInsJtbl()->size);
1384         return toLInsJtbl()->table[index];
1385     }
1386
1387     void LIns::setTarget(uint32_t index, LIns* label) const {
1388         NanoAssert(label && label->isop(LIR_label));
1389         NanoAssert(isop(LIR_jtbl));
1390         NanoAssert(index < toLInsJtbl()->size);
1391         toLInsJtbl()->table[index] = label;
1392     }
1393
1394     GuardRecord *LIns::record() const {
1395         NanoAssert(isGuard());
1396         switch (opcode()) {
1397         case LIR_x:
1398         case LIR_xt:
1399         case LIR_xf:
1400         case LIR_xtbl:
1401         case LIR_xbarrier:
1402             return (GuardRecord*)oprnd2();
1403
1404         case LIR_addxovi:
1405         case LIR_subxovi:
1406         case LIR_mulxovi:
1407             return (GuardRecord*)oprnd3();
1408
1409         default:
1410             NanoAssert(0);
1411             return NULL;
1412         }
1413     }
1414
1415     LoadQual LIns::loadQual() const {
1416         NanoAssert(isLInsLd());
1417         return (LoadQual)toLInsLd()->loadQual;
1418     }
1419
1420     int32_t LIns::disp() const {
1421         if (isLInsSt()) {
1422             return toLInsSt()->disp;
1423         } else {
1424             NanoAssert(isLInsLd());
1425             return toLInsLd()->disp;
1426         }
1427     }
1428
1429     MiniAccSet LIns::miniAccSet() const {
1430         MiniAccSet miniAccSet;
1431         if (isLInsSt()) {
1432             miniAccSet.val = toLInsSt()->miniAccSetVal;
1433         } else {
1434             NanoAssert(isLInsLd());
1435             miniAccSet.val = toLInsLd()->miniAccSetVal;
1436         }
1437         return miniAccSet;
1438     }
1439
1440     AccSet LIns::accSet() const {
1441         return decompressMiniAccSet(miniAccSet());
1442     }
1443
1444     LIns* LIns::prevLIns() const {
1445         NanoAssert(isLInsSk());
1446         return toLInsSk()->prevLIns;
1447     }
1448
1449     inline uint8_t LIns::paramArg()  const { NanoAssert(isop(LIR_paramp)); return toLInsP()->arg; }
1450     inline uint8_t LIns::paramKind() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->kind; }
1451
1452     inline int32_t LIns::immI()     const { NanoAssert(isImmI());  return toLInsI()->immI; }
1453
1454 #ifdef NANOJIT_64BIT
1455     inline int32_t LIns::immQlo()   const { NanoAssert(isImmQ()); return toLInsQorD()->immQorDlo; }
1456     uint64_t       LIns::immQ()     const {
1457         NanoAssert(isImmQ());
1458         return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
1459     }
1460 #endif
1461     inline int32_t LIns::immDlo() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDlo; }
1462     inline int32_t LIns::immDhi() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDhi; }
1463     double         LIns::immD()    const {
1464         NanoAssert(isImmD());
1465         union {
1466             double f;
1467             uint64_t q;
1468         } u;
1469         u.q = immDasQ();
1470         return u.f;
1471     }
1472     uint64_t       LIns::immDasQ()  const {
1473         NanoAssert(isImmD());
1474         return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
1475     }
1476
1477     int32_t LIns::size() const {
1478         NanoAssert(isop(LIR_allocp));
1479         return toLInsI()->immI << 2;
1480     }
1481
1482     void LIns::setSize(int32_t nbytes) {
1483         NanoAssert(isop(LIR_allocp));
1484         NanoAssert(nbytes > 0);
1485         toLInsI()->immI = (nbytes+3)>>2; // # of required 32bit words
1486     }
1487
1488     // Index args in reverse order, i.e. arg(0) returns the rightmost arg.
1489     // Nb: this must be kept in sync with insCall().
1490     LIns* LIns::arg(uint32_t i) const
1491     {
1492         NanoAssert(isCall());
1493         NanoAssert(i < callInfo()->count_args());
1494         return toLInsC()->args[i];  // args[] is in right-to-left order as well
1495     }
1496
1497     uint32_t LIns::argc() const {
1498         return callInfo()->count_args();
1499     }
1500
1501     LIns* LIns::callArgN(uint32_t n) const
1502     {
1503         return arg(argc()-n-1);
1504     }
1505
1506     const CallInfo* LIns::callInfo() const
1507     {
1508         NanoAssert(isCall());
1509         return toLInsC()->ci;
1510     }
1511
1512     uint32_t LIns::getTableSize() const
1513     {
1514         NanoAssert(isLInsJtbl());
1515         return toLInsJtbl()->size;
1516     }
1517
1518     class LirWriter
1519     {
1520     public:
1521         LirWriter *out;
1522
1523         LirWriter(LirWriter* out)
1524             : out(out) {}
1525         virtual ~LirWriter() {}
1526
1527         virtual LIns* ins0(LOpcode v) {
1528             return out->ins0(v);
1529         }
1530         virtual LIns* ins1(LOpcode v, LIns* a) {
1531             return out->ins1(v, a);
1532         }
1533         virtual LIns* ins2(LOpcode v, LIns* a, LIns* b) {
1534             return out->ins2(v, a, b);
1535         }
1536         virtual LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
1537             return out->ins3(v, a, b, c);
1538         }
1539         virtual LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr) {
1540             return out->insGuard(v, c, gr);
1541         }
1542         virtual LIns* insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
1543             return out->insGuardXov(v, a, b, gr);
1544         }
1545         virtual LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
1546             return out->insBranch(v, condition, to);
1547         }
1548         virtual LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
1549             return out->insBranchJov(v, a, b, to);
1550         }
1551         // arg: 0=first, 1=second, ...
1552         // kind: 0=arg 1=saved-reg
1553         virtual LIns* insParam(int32_t arg, int32_t kind) {
1554             return out->insParam(arg, kind);
1555         }
1556         virtual LIns* insImmI(int32_t imm) {
1557             return out->insImmI(imm);
1558         }
1559 #ifdef NANOJIT_64BIT
1560         virtual LIns* insImmQ(uint64_t imm) {
1561             return out->insImmQ(imm);
1562         }
1563 #endif
1564         virtual LIns* insImmD(double d) {
1565             return out->insImmD(d);
1566         }
1567         virtual LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual) {
1568             return out->insLoad(op, base, d, accSet, loadQual);
1569         }
1570         virtual LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet) {
1571             return out->insStore(op, value, base, d, accSet);
1572         }
1573         // args[] is in reverse order, ie. args[0] holds the rightmost arg.
1574         virtual LIns* insCall(const CallInfo *call, LIns* args[]) {
1575             return out->insCall(call, args);
1576         }
1577         virtual LIns* insAlloc(int32_t size) {
1578             NanoAssert(size != 0);
1579             return out->insAlloc(size);
1580         }
1581         virtual LIns* insJtbl(LIns* index, uint32_t size) {
1582             return out->insJtbl(index, size);
1583         }
1584         virtual LIns* insComment(const char* str) {
1585             return out->insComment(str);
1586         }
1587
1588         // convenience functions
1589
1590         // Inserts a conditional to execute and branches to execute if
1591         // the condition is true and false respectively.
1592         LIns* insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov);
1593
1594         // Inserts an integer comparison to 0
1595         LIns* insEqI_0(LIns* oprnd1) {
1596             return ins2ImmI(LIR_eqi, oprnd1, 0);
1597         }
1598
1599         // Inserts a pointer comparison to 0
1600         LIns* insEqP_0(LIns* oprnd1) {
1601             return ins2(LIR_eqp, oprnd1, insImmWord(0));
1602         }
1603
1604         // Inserts a binary operation where the second operand is an
1605         // integer immediate.
1606         LIns* ins2ImmI(LOpcode v, LIns* oprnd1, int32_t imm) {
1607             return ins2(v, oprnd1, insImmI(imm));
1608         }
1609
1610         LIns* insImmP(const void *ptr) {
1611 #ifdef NANOJIT_64BIT
1612             return insImmQ((uint64_t)ptr);
1613 #else
1614             return insImmI((int32_t)ptr);
1615 #endif
1616         }
1617
1618         LIns* insImmWord(intptr_t value) {
1619 #ifdef NANOJIT_64BIT
1620             return insImmQ(value);
1621 #else
1622             return insImmI(value);
1623 #endif
1624         }
1625
1626         // Sign-extend integers to native integers. On 32-bit this is a no-op.
1627         LIns* insI2P(LIns* intIns) {
1628 #ifdef NANOJIT_64BIT
1629             return ins1(LIR_i2q, intIns);
1630 #else
1631             return intIns;
1632 #endif
1633         }
1634
1635         // Zero-extend integers to native integers. On 32-bit this is a no-op.
1636         LIns* insUI2P(LIns* uintIns) {
1637     #ifdef NANOJIT_64BIT
1638             return ins1(LIR_ui2uq, uintIns);
1639     #else
1640             return uintIns;
1641     #endif
1642         }
1643
1644         // Do a load with LoadQual==LOAD_NORMAL.
1645         LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet) {
1646             return insLoad(op, base, d, accSet, LOAD_NORMAL);
1647         }
1648
1649         // Chooses LIR_sti, LIR_stq or LIR_std according to the type of 'value'.
1650         LIns* insStore(LIns* value, LIns* base, int32_t d, AccSet accSet);
1651     };
1652
1653
1654 #ifdef NJ_VERBOSE
1655     extern const char* lirNames[];
1656
1657     // Maps address ranges to meaningful names.
1658     class AddrNameMap
1659     {
1660         Allocator& allocator;
1661         class Entry
1662         {
1663         public:
1664             Entry(int) : name(0), size(0), align(0) {}
1665             Entry(char *n, size_t s, size_t a) : name(n), size(s), align(a) {}
1666             char* name;
1667             size_t size:29, align:3;
1668         };
1669         TreeMap<const void*, Entry*> names;     // maps code regions to names
1670     public:
1671         AddrNameMap(Allocator& allocator);
1672         void addAddrRange(const void *p, size_t size, size_t align, const char *name);
1673         void lookupAddr(void *p, char*& name, int32_t& offset);
1674     };
1675
1676     // Maps LIR instructions to meaningful names.
1677     class LirNameMap
1678     {
1679     private:
1680         Allocator& alloc;
1681
1682         // A small string-wrapper class, required because we need '==' to
1683         // compare string contents, not string pointers, when strings are used
1684         // as keys in CountMap.
1685         struct Str {
1686             Allocator& alloc;
1687             char* s;
1688
1689             Str(Allocator& alloc_, const char* s_) : alloc(alloc_) {
1690                 s = new (alloc) char[1+strlen(s_)];
1691                 strcpy(s, s_);
1692             }
1693
1694             bool operator==(const Str& str) const {
1695                 return (0 == strcmp(this->s, str.s));
1696             }
1697         };
1698
1699         // Similar to 'struct Str' -- we need to hash the string's contents,
1700         // not its pointer.
1701         template<class K> struct StrHash {
1702             static size_t hash(const Str &k) {
1703                 // (const void*) cast is required by ARM RVCT 2.2
1704                 return murmurhash((const void*)k.s, strlen(k.s));
1705             }
1706         };
1707
1708         template <class Key, class H=DefaultHash<Key> >
1709         class CountMap: public HashMap<Key, int, H> {
1710         public:
1711             CountMap(Allocator& alloc) : HashMap<Key, int, H>(alloc, 128) {}
1712             int add(Key k) {
1713                 int c = 1;
1714                 if (this->containsKey(k)) {
1715                     c = 1+this->get(k);
1716                 }
1717                 this->put(k,c);
1718                 return c;
1719             }
1720         };
1721
1722         CountMap<int> lircounts;
1723         CountMap<const CallInfo *> funccounts;
1724         CountMap<Str, StrHash<Str> > namecounts;
1725
1726         void addNameWithSuffix(LIns* i, const char *s, int suffix, bool ignoreOneSuffix);
1727
1728         class Entry
1729         {
1730         public:
1731             Entry(int) : name(0) {}
1732             Entry(char* n) : name(n) {}
1733             char* name;
1734         };
1735
1736         HashMap<LIns*, Entry*> names;
1737
1738     public:
1739         LirNameMap(Allocator& alloc)
1740             : alloc(alloc),
1741             lircounts(alloc),
1742             funccounts(alloc),
1743             namecounts(alloc),
1744             names(alloc)
1745         {}
1746
1747         void        addName(LIns* ins, const char *s);  // gives 'ins' a special name
1748         const char* createName(LIns* ins);              // gives 'ins' a generic name
1749         const char* lookupName(LIns* ins);
1750     };
1751
1752     // We use big buffers for cases where we need to fit a whole instruction,
1753     // and smaller buffers for all the others.  These should easily be long
1754     // enough, but for safety the formatXyz() functions check and won't exceed
1755     // those limits.
1756     class InsBuf {
1757     public:
1758         static const size_t len = 1000;
1759         char buf[len];
1760     };
1761     class RefBuf {
1762     public:
1763         static const size_t len = 200;
1764         char buf[len];
1765     };
1766
1767     class LInsPrinter
1768     {
1769     private:
1770         Allocator& alloc;
1771         const int EMB_NUM_USED_ACCS;
1772
1773         char *formatImmI(RefBuf* buf, int32_t c);
1774 #ifdef NANOJIT_64BIT
1775         char *formatImmQ(RefBuf* buf, uint64_t c);
1776 #endif
1777         char *formatImmD(RefBuf* buf, double c);
1778         void formatGuard(InsBuf* buf, LIns* ins);       // defined by the embedder
1779         void formatGuardXov(InsBuf* buf, LIns* ins);    // defined by the embedder
1780
1781     public:
1782         static const char* accNames[];                  // defined by the embedder
1783
1784         LInsPrinter(Allocator& alloc, int embNumUsedAccs)
1785             : alloc(alloc), EMB_NUM_USED_ACCS(embNumUsedAccs)
1786         {
1787             addrNameMap = new (alloc) AddrNameMap(alloc);
1788             lirNameMap = new (alloc) LirNameMap(alloc);
1789         }
1790
1791         char *formatAddr(RefBuf* buf, void* p);
1792         char *formatRef(RefBuf* buf, LIns* ref, bool showImmValue = true);
1793         char *formatIns(InsBuf* buf, LIns* ins);
1794         char *formatAccSet(RefBuf* buf, AccSet accSet);
1795
1796         AddrNameMap* addrNameMap;
1797         LirNameMap* lirNameMap;
1798     };
1799
1800
1801     class VerboseWriter : public LirWriter
1802     {
1803         InsList code;
1804         LInsPrinter* printer;
1805         LogControl* logc;
1806         const char* const prefix;
1807         bool const always_flush;
1808     public:
1809         VerboseWriter(Allocator& alloc, LirWriter *out, LInsPrinter* printer, LogControl* logc,
1810                       const char* prefix = "", bool always_flush = false)
1811             : LirWriter(out), code(alloc), printer(printer), logc(logc), prefix(prefix), always_flush(always_flush)
1812         {}
1813
1814         LIns* add(LIns* i) {
1815             if (i) {
1816                 code.add(i);
1817                 if (always_flush)
1818                     flush();
1819             }
1820             return i;
1821         }
1822
1823         LIns* add_flush(LIns* i) {
1824             if ((i = add(i)) != 0)
1825                 flush();
1826             return i;
1827         }
1828
1829         void flush()
1830         {
1831             if (!code.isEmpty()) {
1832                 InsBuf b;
1833                 for (Seq<LIns*>* p = code.get(); p != NULL; p = p->tail)
1834                     logc->printf("%s    %s\n", prefix, printer->formatIns(&b, p->head));
1835                 code.clear();
1836             }
1837         }
1838
1839         LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr) {
1840             return add_flush(out->insGuard(op,cond,gr));
1841         }
1842
1843         LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr) {
1844             return add(out->insGuardXov(op,a,b,gr));
1845         }
1846
1847         LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
1848             return add_flush(out->insBranch(v, condition, to));
1849         }
1850
1851         LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
1852             return add(out->insBranchJov(v, a, b, to));
1853         }
1854
1855         LIns* insJtbl(LIns* index, uint32_t size) {
1856             return add_flush(out->insJtbl(index, size));
1857         }
1858
1859         LIns* ins0(LOpcode v) {
1860             if (v == LIR_label || v == LIR_start) {
1861                 flush();
1862             }
1863             return add(out->ins0(v));
1864         }
1865
1866         LIns* ins1(LOpcode v, LIns* a) {
1867             return isRetOpcode(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
1868         }
1869         LIns* ins2(LOpcode v, LIns* a, LIns* b) {
1870             return add(out->ins2(v, a, b));
1871         }
1872         LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
1873             return add(out->ins3(v, a, b, c));
1874         }
1875         LIns* insCall(const CallInfo *call, LIns* args[]) {
1876             return add_flush(out->insCall(call, args));
1877         }
1878         LIns* insParam(int32_t i, int32_t kind) {
1879             return add(out->insParam(i, kind));
1880         }
1881         LIns* insLoad(LOpcode v, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual) {
1882             return add(out->insLoad(v, base, disp, accSet, loadQual));
1883         }
1884         LIns* insStore(LOpcode op, LIns* v, LIns* b, int32_t d, AccSet accSet) {
1885             return add_flush(out->insStore(op, v, b, d, accSet));
1886         }
1887         LIns* insAlloc(int32_t size) {
1888             return add(out->insAlloc(size));
1889         }
1890         LIns* insImmI(int32_t imm) {
1891             return add(out->insImmI(imm));
1892         }
1893 #ifdef NANOJIT_64BIT
1894         LIns* insImmQ(uint64_t imm) {
1895             return add(out->insImmQ(imm));
1896         }
1897 #endif
1898         LIns* insImmD(double d) {
1899             return add(out->insImmD(d));
1900         }
1901
1902         LIns* insComment(const char* str) {
1903             return add_flush(out->insComment(str));
1904         }
1905     };
1906
1907 #endif
1908
1909     class ExprFilter: public LirWriter
1910     {
1911     public:
1912         ExprFilter(LirWriter *out) : LirWriter(out) {}
1913         LIns* ins1(LOpcode v, LIns* a);
1914         LIns* ins2(LOpcode v, LIns* a, LIns* b);
1915         LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
1916         LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
1917         LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
1918         LIns* insBranch(LOpcode, LIns* cond, LIns* target);
1919         LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
1920         LIns* insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual);
1921     private:
1922         LIns* simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2);
1923     };
1924
1925     class CseFilter: public LirWriter
1926     {
1927         enum NLKind {
1928             // We divide instruction kinds into groups.  LIns0 isn't present
1929             // because we don't need to record any 0-ary instructions.  Loads
1930             // aren't here, they're handled separately.
1931             NLImmISmall = 0,
1932             NLImmILarge = 1,
1933             NLImmQ      = 2,   // only occurs on 64-bit platforms
1934             NLImmD      = 3,
1935             NL1         = 4,
1936             NL2         = 5,
1937             NL3         = 6,
1938             NLCall      = 7,
1939
1940             NLFirst = 0,
1941             NLLast = 7,
1942             // Need a value after "last" to outsmart compilers that insist last+1 is impossible.
1943             NLInvalid = 8
1944         };
1945         #define nextNLKind(kind)  NLKind(kind+1)
1946
1947         // There is one table for each NLKind.  This lets us size the lists
1948         // appropriately (some instruction kinds are more common than others).
1949         // It also lets us have NLKind-specific find/add/grow functions, which
1950         // are faster than generic versions.
1951         //
1952         // Nb: m_listNL and m_capNL sizes must be a power of 2.
1953         //     Don't start m_capNL too small, or we'll waste time growing and rehashing.
1954         //     Don't start m_capNL too large, will waste memory.
1955         //
1956         LIns**      m_listNL[NLLast + 1];
1957         uint32_t    m_capNL[ NLLast + 1];
1958         uint32_t    m_usedNL[NLLast + 1];
1959         typedef uint32_t (CseFilter::*find_t)(LIns*);
1960         find_t      m_findNL[NLLast + 1];
1961
1962         // Similarly, for loads, there is one table for each CseAcc.  A CseAcc
1963         // is like a normal access region, but there are two extra possible
1964         // values: CSE_ACC_CONST, which is where we put all CONST-qualified
1965         // loads, and CSE_ACC_MULTIPLE, where we put all multi-region loads.
1966         // All remaining loads are single-region and go in the table entry for
1967         // their region.
1968         //
1969         // This arrangement makes the removal of invalidated loads fast -- we
1970         // can invalidate all loads from a single region by clearing that
1971         // region's table.
1972         //
1973         typedef uint8_t CseAcc;     // same type as MiniAccSet
1974
1975         static const uint8_t CSE_NUM_ACCS = NUM_ACCS + 2;
1976
1977         // These values would be 'static const' except they are defined in
1978         // terms of EMB_NUM_USED_ACCS which is itself not 'static const'
1979         // because it's passed in by the embedding.
1980         const uint8_t EMB_NUM_USED_ACCS;      // number of access regions used by the embedding
1981         const uint8_t CSE_NUM_USED_ACCS;      // EMB_NUM_USED_ACCS + 2
1982         const CseAcc CSE_ACC_CONST;           // EMB_NUM_USED_ACCS + 0
1983         const CseAcc CSE_ACC_MULTIPLE;        // EMB_NUM_USED_ACCS + 1
1984
1985         // We will only use CSE_NUM_USED_ACCS of these entries, ie. the
1986         // number of lists allocated depends on the number of access regions
1987         // in use by the embedding.
1988         LIns**      m_listL[CSE_NUM_ACCS];
1989         uint32_t    m_capL[ CSE_NUM_ACCS];
1990         uint32_t    m_usedL[CSE_NUM_ACCS];
1991
1992         AccSet      storesSinceLastLoad;    // regions stored to since the last load
1993
1994         Allocator& alloc;
1995
1996         // After a conditional guard such as "xf cmp", we know that 'cmp' must
1997         // be true, else we would have side-exited.  So if we see 'cmp' again
1998         // we can treat it like a constant.  This table records such
1999         // comparisons.
2000         HashMap <LIns*, bool> knownCmpValues;
2001
2002         // If true, we will not add new instructions to the CSE tables, but we
2003         // will continue to CSE instructions that match existing table
2004         // entries.  Load instructions will still be removed if aliasing
2005         // stores are encountered.
2006         bool suspended;
2007
2008         CseAcc miniAccSetToCseAcc(MiniAccSet miniAccSet, LoadQual loadQual) {
2009             NanoAssert(miniAccSet.val < NUM_ACCS || miniAccSet.val == MINI_ACCSET_MULTIPLE.val);
2010             return (loadQual == LOAD_CONST) ? CSE_ACC_CONST :
2011                    (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? CSE_ACC_MULTIPLE :
2012                    miniAccSet.val;
2013         }
2014
2015         static uint32_t hash8(uint32_t hash, const uint8_t data);
2016         static uint32_t hash32(uint32_t hash, const uint32_t data);
2017         static uint32_t hashptr(uint32_t hash, const void* data);
2018         static uint32_t hashfinish(uint32_t hash);
2019
2020         static uint32_t hashImmI(int32_t);
2021         static uint32_t hashImmQorD(uint64_t);     // not NANOJIT_64BIT-only -- used by findImmD()
2022         static uint32_t hash1(LOpcode op, LIns*);
2023         static uint32_t hash2(LOpcode op, LIns*, LIns*);
2024         static uint32_t hash3(LOpcode op, LIns*, LIns*, LIns*);
2025         static uint32_t hashLoad(LOpcode op, LIns*, int32_t);
2026         static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
2027
2028         // These versions are used before an LIns has been created.
2029         LIns* findImmISmall(int32_t a, uint32_t &k);
2030         LIns* findImmILarge(int32_t a, uint32_t &k);
2031 #ifdef NANOJIT_64BIT
2032         LIns* findImmQ(uint64_t a, uint32_t &k);
2033 #endif
2034         LIns* findImmD(uint64_t d, uint32_t &k);
2035         LIns* find1(LOpcode v, LIns* a, uint32_t &k);
2036         LIns* find2(LOpcode v, LIns* a, LIns* b, uint32_t &k);
2037         LIns* find3(LOpcode v, LIns* a, LIns* b, LIns* c, uint32_t &k);
2038         LIns* findLoad(LOpcode v, LIns* a, int32_t b, MiniAccSet miniAccSet, LoadQual loadQual,
2039                        uint32_t &k);
2040         LIns* findCall(const CallInfo *call, uint32_t argc, LIns* args[], uint32_t &k);
2041
2042         // These versions are used after an LIns has been created; they are
2043         // used for rehashing after growing.  They just call onto the
2044         // multi-arg versions above.
2045         uint32_t findImmISmall(LIns* ins);
2046         uint32_t findImmILarge(LIns* ins);
2047 #ifdef NANOJIT_64BIT
2048         uint32_t findImmQ(LIns* ins);
2049 #endif
2050         uint32_t findImmD(LIns* ins);
2051         uint32_t find1(LIns* ins);
2052         uint32_t find2(LIns* ins);
2053         uint32_t find3(LIns* ins);
2054         uint32_t findCall(LIns* ins);
2055         uint32_t findLoad(LIns* ins);
2056
2057         // These return false if they failed to grow due to OOM.
2058         bool growNL(NLKind kind);
2059         bool growL(CseAcc cseAcc);
2060
2061         void addNLImmISmall(LIns* ins, uint32_t k);
2062         // 'k' is the index found by findXYZ().
2063         void addNL(NLKind kind, LIns* ins, uint32_t k);
2064         void addL(LIns* ins, uint32_t k);
2065
2066         void clearAll();            // clears all tables
2067         void clearNL(NLKind);       // clears one non-load table
2068         void clearL(CseAcc);        // clears one load table
2069
2070     public:
2071         CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&);
2072
2073         // CseFilter does some largish fallible allocations at start-up.  If
2074         // they fail, the constructor sets this field to 'true'.  It should be
2075         // checked after creation, and if set the CseFilter cannot be used.
2076         // (But the check can be skipped if allocChunk() always succeeds.)
2077         //
2078         // FIXME: This fallibility is a sop to TraceMonkey's implementation of
2079         // infallible malloc -- by avoiding some largish infallible
2080         // allocations, it reduces the size of the reserve space needed.
2081         // Bug 624590 is open to fix this.
2082         bool initOOM;
2083
2084         LIns* insImmI(int32_t imm);
2085 #ifdef NANOJIT_64BIT
2086         LIns* insImmQ(uint64_t q);
2087 #endif
2088         LIns* insImmD(double d);
2089         LIns* ins0(LOpcode v);
2090         LIns* ins1(LOpcode v, LIns*);
2091         LIns* ins2(LOpcode v, LIns*, LIns*);
2092         LIns* ins3(LOpcode v, LIns*, LIns*, LIns*);
2093         LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
2094         LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
2095         LIns* insCall(const CallInfo *call, LIns* args[]);
2096         LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
2097         LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
2098
2099         // These functions provide control over CSE in the face of control
2100         // flow.  A suspend()/resume() pair may be put around a synthetic
2101         // control flow diamond, preventing the inserted label from resetting
2102         // the CSE state.  A suspend() call must be dominated by a resume()
2103         // call, else incorrect code could result.
2104         void suspend() { suspended = true; }
2105         void resume() { suspended = false; }
2106     };
2107
2108     class LirBuffer
2109     {
2110         public:
2111             LirBuffer(Allocator& alloc);
2112             void        clear();
2113             uintptr_t   makeRoom(size_t szB);   // make room for an instruction
2114
2115             debug_only (void validate() const;)
2116             verbose_only(LInsPrinter* printer;)
2117
2118             int32_t insCount();
2119
2120             // stats
2121             struct
2122             {
2123                 uint32_t lir;    // # instructions
2124             }
2125             _stats;
2126
2127             AbiKind abi;
2128             LIns *state, *param1, *sp, *rp;
2129             LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0
2130
2131             /** Each chunk is just a raw area of LIns instances, with no header
2132                 and no more than 8-byte alignment.  The chunk size is somewhat arbitrary. */
2133             static const size_t CHUNK_SZB = 8000;
2134
2135         protected:
2136             friend class LirBufWriter;
2137
2138             /** Get CHUNK_SZB more memory for LIR instructions. */
2139             void        chunkAlloc();
2140             void        moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk);
2141
2142             Allocator&  _allocator;
2143             uintptr_t   _unused;   // next unused instruction slot in the current LIR chunk
2144             uintptr_t   _limit;    // one past the last usable byte of the current LIR chunk
2145     };
2146
2147     class LirBufWriter : public LirWriter
2148     {
2149         LirBuffer*              _buf;        // underlying buffer housing the instructions
2150         const Config&           _config;
2151
2152         public:
2153             LirBufWriter(LirBuffer* buf, const Config& config)
2154                 : LirWriter(0), _buf(buf), _config(config) {
2155             }
2156
2157             // LirWriter interface
2158             LIns*   insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual);
2159             LIns*   insStore(LOpcode op, LIns* o1, LIns* o2, int32_t disp, AccSet accSet);
2160             LIns*   ins0(LOpcode op);
2161             LIns*   ins1(LOpcode op, LIns* o1);
2162             LIns*   ins2(LOpcode op, LIns* o1, LIns* o2);
2163             LIns*   ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3);
2164             LIns*   insParam(int32_t i, int32_t kind);
2165             LIns*   insImmI(int32_t imm);
2166 #ifdef NANOJIT_64BIT
2167             LIns*   insImmQ(uint64_t imm);
2168 #endif
2169             LIns*   insImmD(double d);
2170             LIns*   insCall(const CallInfo *call, LIns* args[]);
2171             LIns*   insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
2172             LIns*   insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
2173             LIns*   insBranch(LOpcode v, LIns* condition, LIns* to);
2174             LIns*   insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
2175             LIns*   insAlloc(int32_t size);
2176             LIns*   insJtbl(LIns* index, uint32_t size);
2177             LIns*   insComment(const char* str);
2178     };
2179
2180     class LirFilter
2181     {
2182     public:
2183         LirFilter *in;
2184         LirFilter(LirFilter *in) : in(in) {}
2185         virtual ~LirFilter(){}
2186
2187         // It's crucial that once this reaches the LIR_start at the beginning
2188         // of the buffer, that it just keeps returning that LIR_start LIns on
2189         // any subsequent calls.
2190         virtual LIns* read() {
2191             return in->read();
2192         }
2193         virtual LIns* finalIns() {
2194             return in->finalIns();
2195         }
2196     };
2197
2198     // concrete
2199     class LirReader : public LirFilter
2200     {
2201         LIns* _ins;         // next instruction to be read;  invariant: is never a skip
2202         LIns* _finalIns;    // final instruction in the stream;  ie. the first one to be read
2203
2204     public:
2205         LirReader(LIns* ins) : LirFilter(0), _ins(ins), _finalIns(ins)
2206         {
2207             // The last instruction for a fragment shouldn't be a skip.
2208             // (Actually, if the last *inserted* instruction exactly fills up
2209             // a chunk, a new chunk will be created, and thus the last *written*
2210             // instruction will be a skip -- the one needed for the
2211             // cross-chunk link.  But the last *inserted* instruction is what
2212             // is recorded and used to initialise each LirReader, and that is
2213             // what is seen here, and therefore this assertion holds.)
2214             NanoAssert(ins && !ins->isop(LIR_skip));
2215         }
2216         virtual ~LirReader() {}
2217
2218         // Returns next instruction and advances to the prior instruction.
2219         // Invariant: never returns a skip.
2220         LIns* read();
2221
2222         LIns* finalIns() {
2223             return _finalIns;
2224         }
2225     };
2226
2227     verbose_only(void live(LirFilter* in, Allocator& alloc, Fragment* frag, LogControl*);)
2228
2229     // WARNING: StackFilter assumes that all stack entries are eight bytes.
2230     // Some of its optimisations aren't valid if that isn't true.  See
2231     // StackFilter::read() for more details.
2232     class StackFilter: public LirFilter
2233     {
2234         LIns* sp;
2235         BitSet stk;
2236         int top;
2237         int getTop(LIns* br);
2238
2239     public:
2240         StackFilter(LirFilter *in, Allocator& alloc, LIns* sp);
2241         LIns* read();
2242     };
2243
2244     // This type is used to perform a simple interval analysis of 32-bit
2245     // add/sub/mul.  It lets us avoid overflow checks in some cases.
2246     struct Interval
2247     {
2248         // The bounds are 64-bit integers so that any overflow from a 32-bit
2249         // operation can be safely detected.
2250         //
2251         // If 'hasOverflowed' is false, 'lo' and 'hi' must be in the range
2252         // I32_MIN..I32_MAX.  If 'hasOverflowed' is true, 'lo' and 'hi' should
2253         // not be trusted (and in debug builds we set them both to a special
2254         // value UNTRUSTWORTHY that is outside the I32_MIN..I32_MAX range to
2255         // facilitate sanity checking).
2256         //
2257         int64_t lo;
2258         int64_t hi;
2259         bool hasOverflowed;
2260
2261         static const int64_t I32_MIN = int64_t(int32_t(0x80000000));
2262         static const int64_t I32_MAX = int64_t(int32_t(0x7fffffff));
2263
2264 #ifdef DEBUG
2265         static const int64_t UNTRUSTWORTHY = int64_t(0xdeafdeadbeeffeedLL);
2266
2267         bool isSane() {
2268             return (hasOverflowed && lo == UNTRUSTWORTHY && hi == UNTRUSTWORTHY) ||
2269                    (!hasOverflowed && lo <= hi && I32_MIN <= lo && hi <= I32_MAX);
2270         }
2271 #endif
2272
2273         Interval(int64_t lo_, int64_t hi_) {
2274             if (lo_ < I32_MIN || I32_MAX < hi_) {
2275                 hasOverflowed = true;
2276 #ifdef DEBUG
2277                 lo = UNTRUSTWORTHY;
2278                 hi = UNTRUSTWORTHY;
2279 #endif
2280             } else {
2281                 hasOverflowed = false;
2282                 lo = lo_;
2283                 hi = hi_;
2284             }
2285             NanoAssert(isSane());
2286         }
2287
2288         static Interval OverflowInterval() {
2289             Interval interval(0, 0);
2290 #ifdef DEBUG
2291             interval.lo = UNTRUSTWORTHY;
2292             interval.hi = UNTRUSTWORTHY;
2293 #endif
2294             interval.hasOverflowed = true;
2295             return interval;
2296         }
2297
2298         static Interval of(LIns* ins, int32_t lim);
2299
2300         static Interval add(Interval x, Interval y);
2301         static Interval sub(Interval x, Interval y);
2302         static Interval mul(Interval x, Interval y);
2303
2304         bool canBeZero() {
2305             NanoAssert(isSane());
2306             return hasOverflowed || (lo <= 0 && 0 <= hi);
2307         }
2308
2309         bool canBeNegative() {
2310             NanoAssert(isSane());
2311             return hasOverflowed || (lo < 0);
2312         }
2313     };
2314
2315 #if NJ_SOFTFLOAT_SUPPORTED
2316     struct SoftFloatOps
2317     {
2318         const CallInfo* opmap[LIR_sentinel];
2319         SoftFloatOps();
2320     };
2321
2322     extern const SoftFloatOps softFloatOps;
2323
2324     // Replaces fpu ops with function calls, for platforms lacking float
2325     // hardware (eg. some ARM machines).
2326     class SoftFloatFilter: public LirWriter
2327     {
2328     public:
2329         static const CallInfo* opmap[LIR_sentinel];
2330
2331         SoftFloatFilter(LirWriter *out);
2332         LIns *split(LIns *a);
2333         LIns *split(const CallInfo *call, LIns* args[]);
2334         LIns *callD1(const CallInfo *call, LIns *a);
2335         LIns *callD2(const CallInfo *call, LIns *a, LIns *b);
2336         LIns *callI1(const CallInfo *call, LIns *a);
2337         LIns *cmpD(const CallInfo *call, LIns *a, LIns *b);
2338         LIns *ins1(LOpcode op, LIns *a);
2339         LIns *ins2(LOpcode op, LIns *a, LIns *b);
2340         LIns *insCall(const CallInfo *ci, LIns* args[]);
2341     };
2342 #endif
2343
2344 #ifdef DEBUG
2345     // This class does thorough checking of LIR.  It checks *implicit* LIR
2346     // instructions, ie. LIR instructions specified via arguments -- to
2347     // methods like insLoad() -- that have not yet been converted into
2348     // *explicit* LIns objects in a LirBuffer.  The reason for this is that if
2349     // we wait until the LIR instructions are explicit, they will have gone
2350     // through the entire writer pipeline and been optimised.  By checking
2351     // implicit LIR instructions we can check the LIR code at the start of the
2352     // writer pipeline, exactly as it is generated by the compiler front-end.
2353     //
2354     // A general note about the errors produced by this class:  for
2355     // TraceMonkey, they won't include special names for instructions that
2356     // have them unless TMFLAGS is specified.
2357     class ValidateWriter : public LirWriter
2358     {
2359     private:
2360         LInsPrinter* printer;
2361         const char* whereInPipeline;
2362
2363         const char* type2string(LTy type);
2364         void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]);
2365         void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg,
2366                                     const char* shouldBeDesc);
2367         void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc);
2368         void errorLoadQual(const char* what, LoadQual loadQual);
2369         void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2);
2370         void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins);
2371         void checkLInsIsNull(LOpcode op, int argN, LIns* ins);
2372         void checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet);   // defined by the embedder
2373
2374         // These can be set by the embedder and used in checkAccSet().
2375         void** checkAccSetExtras;
2376
2377     public:
2378         ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where);
2379         void setCheckAccSetExtras(void** extras) { checkAccSetExtras = extras; }
2380
2381         LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
2382         LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
2383         LIns* ins0(LOpcode v);
2384         LIns* ins1(LOpcode v, LIns* a);
2385         LIns* ins2(LOpcode v, LIns* a, LIns* b);
2386         LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
2387         LIns* insParam(int32_t arg, int32_t kind);
2388         LIns* insImmI(int32_t imm);
2389 #ifdef NANOJIT_64BIT
2390         LIns* insImmQ(uint64_t imm);
2391 #endif
2392         LIns* insImmD(double d);
2393         LIns* insCall(const CallInfo *call, LIns* args[]);
2394         LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
2395         LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
2396         LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
2397         LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
2398         LIns* insAlloc(int32_t size);
2399         LIns* insJtbl(LIns* index, uint32_t size);
2400     };
2401
2402     // This just checks things that aren't possible to check in
2403     // ValidateWriter, eg. whether all branch targets are set and are labels.
2404     class ValidateReader: public LirFilter {
2405     public:
2406         ValidateReader(LirFilter* in);
2407         LIns* read();
2408     };
2409 #endif
2410
2411 #ifdef NJ_VERBOSE
2412     /* A listing filter for LIR, going through backwards.  It merely
2413        passes its input to its output, but notes it down too.  When
2414        finish() is called, prints out what went through.  Is intended to be
2415        used to print arbitrary intermediate transformation stages of
2416        LIR. */
2417     class ReverseLister : public LirFilter
2418     {
2419         Allocator&   _alloc;
2420         LInsPrinter* _printer;
2421         const char*  _title;
2422         StringList   _strs;
2423         LogControl*  _logc;
2424         LIns*        _prevIns;
2425     public:
2426         ReverseLister(LirFilter* in, Allocator& alloc,
2427                       LInsPrinter* printer, LogControl* logc, const char* title)
2428             : LirFilter(in)
2429             , _alloc(alloc)
2430             , _printer(printer)
2431             , _title(title)
2432             , _strs(alloc)
2433             , _logc(logc)
2434             , _prevIns(NULL)
2435         { }
2436
2437         void finish();
2438         LIns* read();
2439     };
2440 #endif
2441
2442 }
2443 #endif // __nanojit_LIR__