1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
44 using namespace avmplus;
45 #ifdef FEATURE_NANOJIT
47 const uint8_t repKinds[] = {
48 #define OP___(op, number, repKind, retType, isCse) \
50 #include "LIRopcode.tbl"
55 const LTy retTypes[] = {
56 #define OP___(op, number, repKind, retType, isCse) \
58 #include "LIRopcode.tbl"
63 const int8_t isCses[] = {
64 #define OP___(op, number, repKind, retType, isCse) \
66 #include "LIRopcode.tbl"
71 // LIR verbose specific
74 const char* lirNames[] = {
75 #define OP___(op, number, repKind, retType, isCse) \
77 #include "LIRopcode.tbl"
82 #endif /* NANOJIT_VERBOSE */
84 uint32_t CallInfo::count_args() const
87 uint32_t argt = _typesig;
88 argt >>= TYPESIG_FIELDSZB; // remove retType
91 argt >>= TYPESIG_FIELDSZB;
96 uint32_t CallInfo::count_int32_args() const
99 uint32_t argt = _typesig;
100 argt >>= TYPESIG_FIELDSZB; // remove retType
102 ArgType a = ArgType(argt & TYPESIG_FIELDMASK);
103 if (a == ARGTYPE_I || a == ARGTYPE_UI)
105 argt >>= TYPESIG_FIELDSZB;
110 uint32_t CallInfo::getArgTypes(ArgType* argTypes) const
113 uint32_t argt = _typesig;
114 argt >>= TYPESIG_FIELDSZB; // remove retType
116 ArgType a = ArgType(argt & TYPESIG_FIELDMASK);
119 argt >>= TYPESIG_FIELDSZB;
126 void ReverseLister::finish()
129 _logc->printf("=== BEGIN %s ===\n", _title);
131 for (Seq<char*>* p = _strs.get(); p != NULL; p = p->tail)
132 _logc->printf(" %02d: %s\n", j++, p->head);
133 _logc->printf("=== END %s ===\n", _title);
137 LIns* ReverseLister::read()
139 // This check is necessary to avoid printing the LIR_start multiple
140 // times due to lookahead in Assembler::gen().
141 if (_prevIns && _prevIns->isop(LIR_start))
143 LIns* ins = in->read();
145 const char* str = _printer->formatIns(&b, ins);
146 char* cpy = new (_alloc) char[strlen(str)+1];
147 VMPI_strcpy(cpy, str);
155 LirBuffer::LirBuffer(Allocator& alloc) :
159 abi(ABI_FASTCALL), state(NULL), param1(NULL), sp(NULL), rp(NULL),
165 void LirBuffer::clear()
167 // clear the stats, etc
171 for (int i = 0; i < NumSavedRegs; ++i)
176 void LirBuffer::chunkAlloc()
178 _unused = (uintptr_t) _allocator.alloc(CHUNK_SZB);
179 NanoAssert(_unused != 0); // Allocator.alloc() never returns null. See Allocator.h
180 _limit = _unused + CHUNK_SZB;
183 int32_t LirBuffer::insCount()
188 // Allocate a new page, and write the first instruction to it -- a skip
189 // linking to last instruction of the previous page.
190 void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
193 // Link LIR stream back to prior instruction.
194 // Unlike all the ins*() functions, we don't call makeRoom() here
195 // because we know we have enough space, having just started a new
197 LInsSk* insSk = (LInsSk*)_unused;
198 LIns* ins = insSk->getLIns();
199 ins->initLInsSk((LIns*)addrOfLastLInsOnCurrentChunk);
200 _unused += sizeof(LInsSk);
201 verbose_only(_stats.lir++);
204 // Make room for a single instruction.
205 uintptr_t LirBuffer::makeRoom(size_t szB)
207 // Make sure the size is ok
208 NanoAssert(0 == szB % sizeof(void*));
209 NanoAssert(sizeof(LIns) <= szB && szB <= sizeof(LInsSt)); // LInsSt is the biggest one
210 NanoAssert(_unused < _limit);
212 debug_only( bool moved = false; )
214 // If the instruction won't fit on the current chunk, get a new chunk
215 if (_unused + szB > _limit) {
216 uintptr_t addrOfLastLInsOnChunk = _unused - sizeof(LIns);
217 moveToNewChunk(addrOfLastLInsOnChunk);
218 debug_only( moved = true; )
221 // We now know that we are on a chunk that has the requested amount of
222 // room: record the starting address of the requested space and bump
224 uintptr_t startOfRoom = _unused;
226 verbose_only(_stats.lir++); // count the instruction
228 // If there's no more space on this chunk, move to a new one.
229 // (This will only occur if the asked-for size filled up exactly to
230 // the end of the chunk.) This ensures that next time we enter this
231 // function, _unused won't be pointing one byte past the end of
232 // the chunk, which would break everything.
233 if (_unused >= _limit) {
234 // Check we used exactly the remaining space
235 NanoAssert(_unused == _limit);
236 NanoAssert(!moved); // shouldn't need to moveToNewChunk twice
237 uintptr_t addrOfLastLInsOnChunk = _unused - sizeof(LIns);
238 moveToNewChunk(addrOfLastLInsOnChunk);
241 // Make sure it's word-aligned.
242 NanoAssert(0 == startOfRoom % sizeof(void*));
246 LIns* LirBufWriter::insStore(LOpcode op, LIns* val, LIns* base, int32_t d, AccSet accSet)
249 LInsSt* insSt = (LInsSt*)_buf->makeRoom(sizeof(LInsSt));
250 LIns* ins = insSt->getLIns();
251 ins->initLInsSt(op, val, base, d, accSet);
254 // If the displacement is more than 16 bits, put it in a separate instruction.
255 return insStore(op, val, ins2(LIR_addp, base, insImmWord(d)), 0, accSet);
259 LIns* LirBufWriter::ins0(LOpcode op)
261 LInsOp0* insOp0 = (LInsOp0*)_buf->makeRoom(sizeof(LInsOp0));
262 LIns* ins = insOp0->getLIns();
263 ins->initLInsOp0(op);
267 LIns* LirBufWriter::ins1(LOpcode op, LIns* o1)
269 LInsOp1* insOp1 = (LInsOp1*)_buf->makeRoom(sizeof(LInsOp1));
270 LIns* ins = insOp1->getLIns();
271 ins->initLInsOp1(op, o1);
275 LIns* LirBufWriter::ins2(LOpcode op, LIns* o1, LIns* o2)
277 LInsOp2* insOp2 = (LInsOp2*)_buf->makeRoom(sizeof(LInsOp2));
278 LIns* ins = insOp2->getLIns();
279 ins->initLInsOp2(op, o1, o2);
283 LIns* LirBufWriter::ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3)
285 LInsOp3* insOp3 = (LInsOp3*)_buf->makeRoom(sizeof(LInsOp3));
286 LIns* ins = insOp3->getLIns();
287 ins->initLInsOp3(op, o1, o2, o3);
291 LIns* LirBufWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual)
294 LInsLd* insLd = (LInsLd*)_buf->makeRoom(sizeof(LInsLd));
295 LIns* ins = insLd->getLIns();
296 ins->initLInsLd(op, base, d, accSet, loadQual);
299 // If the displacement is more than 16 bits, put it in a separate instruction.
300 // Note that CseFilter::insLoad() also does this, so this will
301 // only occur if CseFilter has been removed from the pipeline.
302 return insLoad(op, ins2(LIR_addp, base, insImmWord(d)), 0, accSet, loadQual);
306 LIns* LirBufWriter::insGuard(LOpcode op, LIns* c, GuardRecord *gr)
308 debug_only( if (LIR_x == op || LIR_xbarrier == op) NanoAssert(!c); )
309 return ins2(op, c, (LIns*)gr);
312 LIns* LirBufWriter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr)
314 return ins3(op, a, b, (LIns*)gr);
317 LIns* LirBufWriter::insBranch(LOpcode op, LIns* condition, LIns* toLabel)
319 NanoAssert((op == LIR_j && !condition) ||
320 ((op == LIR_jf || op == LIR_jt) && condition));
321 return ins2(op, condition, toLabel);
324 LIns* LirBufWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* toLabel)
326 return ins3(op, a, b, toLabel);
329 LIns* LirBufWriter::insJtbl(LIns* index, uint32_t size)
331 LInsJtbl* insJtbl = (LInsJtbl*) _buf->makeRoom(sizeof(LInsJtbl));
332 LIns** table = new (_buf->_allocator) LIns*[size];
333 LIns* ins = insJtbl->getLIns();
334 VMPI_memset(table, 0, size * sizeof(LIns*));
335 ins->initLInsJtbl(index, size, table);
339 LIns* LirBufWriter::insAlloc(int32_t size)
341 size = (size+3)>>2; // # of required 32bit words
342 LInsI* insI = (LInsI*)_buf->makeRoom(sizeof(LInsI));
343 LIns* ins = insI->getLIns();
344 ins->initLInsI(LIR_allocp, size);
348 LIns* LirBufWriter::insParam(int32_t arg, int32_t kind)
350 LInsP* insP = (LInsP*)_buf->makeRoom(sizeof(LInsP));
351 LIns* ins = insP->getLIns();
352 ins->initLInsP(arg, kind);
354 NanoAssert(arg < NumSavedRegs);
355 _buf->savedRegs[arg] = ins;
360 LIns* LirBufWriter::insImmI(int32_t imm)
362 LInsI* insI = (LInsI*)_buf->makeRoom(sizeof(LInsI));
363 LIns* ins = insI->getLIns();
364 ins->initLInsI(LIR_immi, imm);
369 LIns* LirBufWriter::insImmQ(uint64_t imm)
371 LInsQorD* insQorD = (LInsQorD*)_buf->makeRoom(sizeof(LInsQorD));
372 LIns* ins = insQorD->getLIns();
373 ins->initLInsQorD(LIR_immq, imm);
378 LIns* LirBufWriter::insComment(const char* str)
380 // Allocate space for and copy the string. We use the same allocator
381 // as the normal LIR buffers so it has the same lifetime.
382 char* str2 = (char*)_buf->_allocator.alloc(VMPI_strlen(str) + 1);
383 VMPI_strcpy(str2, str);
384 return ins1(LIR_comment, (LIns*)str);
387 LIns* LirBufWriter::insImmD(double d)
389 LInsQorD* insQorD = (LInsQorD*)_buf->makeRoom(sizeof(LInsQorD));
390 LIns* ins = insQorD->getLIns();
396 ins->initLInsQorD(LIR_immd, u.q);
400 // Reads the next non-skip instruction.
401 LIns* LirReader::read()
403 static const uint8_t insSizes[] = {
404 // LIR_start is treated specially -- see below.
405 #define OP___(op, number, repKind, retType, isCse) \
406 ((number) == LIR_start ? 0 : sizeof(LIns##repKind)),
407 #include "LIRopcode.tbl"
412 // Check the invariant: _ins never points to a skip.
413 NanoAssert(_ins && !_ins->isop(LIR_skip));
415 // Step back one instruction. Use a table lookup rather than a switch
416 // to avoid branch mispredictions. LIR_start is given a special size
417 // of zero so that we don't step back past the start of the block.
418 // (Callers of this function should stop once they see a LIR_start.)
420 _ins = (LIns*)(uintptr_t(_ins) - insSizes[_ins->opcode()]);
422 // Ensure _ins doesn't end up pointing to a skip.
423 while (_ins->isop(LIR_skip)) {
424 NanoAssert(_ins->prevLIns() != _ins);
425 _ins = _ins->prevLIns();
431 LOpcode arithOpcodeD2I(LOpcode op)
434 case LIR_negd: return LIR_negi;
435 case LIR_addd: return LIR_addi;
436 case LIR_subd: return LIR_subi;
437 case LIR_muld: return LIR_muli;
438 default: NanoAssert(0); return LIR_skip;
443 LOpcode cmpOpcodeI2Q(LOpcode op)
446 case LIR_eqi: return LIR_eqq;
447 case LIR_lti: return LIR_ltq;
448 case LIR_gti: return LIR_gtq;
449 case LIR_lei: return LIR_leq;
450 case LIR_gei: return LIR_geq;
451 case LIR_ltui: return LIR_ltuq;
452 case LIR_gtui: return LIR_gtuq;
453 case LIR_leui: return LIR_leuq;
454 case LIR_geui: return LIR_geuq;
455 default: NanoAssert(0); return LIR_skip;
460 LOpcode cmpOpcodeD2I(LOpcode op)
463 case LIR_eqd: return LIR_eqi;
464 case LIR_ltd: return LIR_lti;
465 case LIR_gtd: return LIR_gti;
466 case LIR_led: return LIR_lei;
467 case LIR_ged: return LIR_gei;
468 default: NanoAssert(0); return LIR_skip;
472 LOpcode cmpOpcodeD2UI(LOpcode op)
475 case LIR_eqd: return LIR_eqi;
476 case LIR_ltd: return LIR_ltui;
477 case LIR_gtd: return LIR_gtui;
478 case LIR_led: return LIR_leui;
479 case LIR_ged: return LIR_geui;
480 default: NanoAssert(0); return LIR_skip;
484 // This is never called, but that's ok because it contains only static
486 void LIns::staticSanityCheck()
488 // LIns must be word-sized.
489 NanoStaticAssert(sizeof(LIns) == 1*sizeof(void*));
491 // LInsXYZ have expected sizes too.
492 NanoStaticAssert(sizeof(LInsOp0) == 1*sizeof(void*));
493 NanoStaticAssert(sizeof(LInsOp1) == 2*sizeof(void*));
494 NanoStaticAssert(sizeof(LInsOp2) == 3*sizeof(void*));
495 NanoStaticAssert(sizeof(LInsOp3) == 4*sizeof(void*));
496 NanoStaticAssert(sizeof(LInsLd) == 3*sizeof(void*));
497 NanoStaticAssert(sizeof(LInsSt) == 4*sizeof(void*));
498 NanoStaticAssert(sizeof(LInsSk) == 2*sizeof(void*));
499 NanoStaticAssert(sizeof(LInsC) == 3*sizeof(void*));
500 NanoStaticAssert(sizeof(LInsP) == 2*sizeof(void*));
501 NanoStaticAssert(sizeof(LInsI) == 2*sizeof(void*));
502 #if defined NANOJIT_64BIT
503 NanoStaticAssert(sizeof(LInsQorD) == 2*sizeof(void*));
505 NanoStaticAssert(sizeof(LInsQorD) == 3*sizeof(void*));
507 NanoStaticAssert(sizeof(LInsJtbl) == 4*sizeof(void*));
509 // oprnd_1 must be in the same position in LIns{Op1,Op2,Op3,Ld,St,Jtbl}
510 // because oprnd1() is used for all of them.
511 #define OP1OFFSET (offsetof(LInsOp1, ins) - offsetof(LInsOp1, oprnd_1))
512 NanoStaticAssert( OP1OFFSET == (offsetof(LInsOp2, ins) - offsetof(LInsOp2, oprnd_1)) );
513 NanoStaticAssert( OP1OFFSET == (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_1)) );
514 NanoStaticAssert( OP1OFFSET == (offsetof(LInsLd, ins) - offsetof(LInsLd, oprnd_1)) );
515 NanoStaticAssert( OP1OFFSET == (offsetof(LInsSt, ins) - offsetof(LInsSt, oprnd_1)) );
516 NanoStaticAssert( OP1OFFSET == (offsetof(LInsJtbl, ins) - offsetof(LInsJtbl, oprnd_1)) );
518 // oprnd_2 must be in the same position in LIns{Op2,Op3,St}
519 // because oprnd2() is used for all of them.
520 #define OP2OFFSET (offsetof(LInsOp2, ins) - offsetof(LInsOp2, oprnd_2))
521 NanoStaticAssert( OP2OFFSET == (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) );
522 NanoStaticAssert( OP2OFFSET == (offsetof(LInsSt, ins) - offsetof(LInsSt, oprnd_2)) );
525 bool insIsS16(LIns* i)
532 return insIsS16(i->oprnd2()) && insIsS16(i->oprnd3());
536 // many other possibilities too.
540 LIns* ExprFilter::ins1(LOpcode v, LIns* oprnd)
546 return insImmI(oprnd->immQlo());
550 return insImmQ(int64_t(int32_t(oprnd->immI())));
554 return insImmQ(uint64_t(uint32_t(oprnd->immI())));
557 if (oprnd->isop(LIR_qasd))
558 return oprnd->oprnd1();
561 if (oprnd->isop(LIR_dasq))
562 return oprnd->oprnd1();
565 #if NJ_SOFTFLOAT_SUPPORTED
568 return insImmI(oprnd->immDlo());
569 if (oprnd->isop(LIR_ii2d))
570 return oprnd->oprnd1();
574 return insImmI(oprnd->immDhi());
575 if (oprnd->isop(LIR_ii2d))
576 return oprnd->oprnd2();
581 return insImmI(~oprnd->immI());
583 if (v == oprnd->opcode())
584 return oprnd->oprnd1();
588 return insImmI(-oprnd->immI());
589 if (oprnd->isop(LIR_subi)) // -(a-b) = b-a
590 return out->ins2(LIR_subi, oprnd->oprnd2(), oprnd->oprnd1());
594 return insImmD(-oprnd->immD());
595 if (oprnd->isop(LIR_subd))
596 return out->ins2(LIR_subd, oprnd->oprnd2(), oprnd->oprnd1());
600 return insImmD(oprnd->immI());
601 // Nb: i2d(d2i(x)) != x
605 return insImmI(int32_t(oprnd->immD()));
606 if (oprnd->isop(LIR_i2d))
607 return oprnd->oprnd1();
611 return insImmD(uint32_t(oprnd->immI()));
617 return out->ins1(v, oprnd);
620 // This is an ugly workaround for an apparent compiler
621 // bug; in VC2008, compiling with optimization on
622 // will produce spurious errors if this code is inlined
623 // into ExprFilter::ins2(). See https://bugzilla.mozilla.org/show_bug.cgi?id=538504
624 inline double do_join(int32_t c1, int32_t c2)
630 u.u64 = uint32_t(c1) | uint64_t(c2)<<32;
634 LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2)
636 NanoAssert(oprnd1 && oprnd2);
638 //-------------------------------------------------------------------
639 // Folding where the two operands are equal
640 //-------------------------------------------------------------------
641 if (oprnd1 == oprnd2) {
642 // The operands are equal.
660 return insImmI(1); // (x <= x) == 1; (x >= x) == 1
667 //-------------------------------------------------------------------
668 // Folding where both operands are immediates, grouped by type
669 //-------------------------------------------------------------------
670 if (oprnd1->isImmI() && oprnd2->isImmI()) {
671 // The operands are both int immediates.
672 int32_t c1 = oprnd1->immI();
673 int32_t c2 = oprnd2->immI();
678 #if NJ_SOFTFLOAT_SUPPORTED
679 case LIR_ii2d: return insImmD(do_join(c1, c2));
681 case LIR_eqi: return insImmI(c1 == c2);
682 case LIR_lti: return insImmI(c1 < c2);
683 case LIR_gti: return insImmI(c1 > c2);
684 case LIR_lei: return insImmI(c1 <= c2);
685 case LIR_gei: return insImmI(c1 >= c2);
686 case LIR_ltui: return insImmI(uint32_t(c1) < uint32_t(c2));
687 case LIR_gtui: return insImmI(uint32_t(c1) > uint32_t(c2));
688 case LIR_leui: return insImmI(uint32_t(c1) <= uint32_t(c2));
689 case LIR_geui: return insImmI(uint32_t(c1) >= uint32_t(c2));
691 case LIR_lshi: return insImmI(c1 << (c2 & 0x1f));
692 case LIR_rshi: return insImmI(c1 >> (c2 & 0x1f));
693 case LIR_rshui: return insImmI(uint32_t(c1) >> (c2 & 0x1f));
695 case LIR_ori: return insImmI(c1 | c2);
696 case LIR_andi: return insImmI(c1 & c2);
697 case LIR_xori: return insImmI(c1 ^ c2);
699 case LIR_addi: d = double(c1) + double(c2); goto fold;
700 case LIR_subi: d = double(c1) - double(c2); goto fold;
701 case LIR_muli: d = double(c1) * double(c2); goto fold;
703 // Make sure the constant expression doesn't overflow. This
704 // probably isn't necessary, because the C++ overflow
705 // behaviour is very likely to be the same as the machine code
706 // overflow behaviour, but we do it just to be safe.
712 #if defined NANOJIT_IA32 || defined NANOJIT_X64
715 // We can't easily fold div and mod, since folding div makes it
716 // impossible to calculate the mod that refers to it. The
717 // frontend shouldn't emit div and mod with constant operands.
725 } else if (oprnd1->isImmQ() && oprnd2->isImmQ()) {
726 // The operands are both quad immediates.
727 int64_t c1 = oprnd1->immQ();
728 int64_t c2 = oprnd2->immQ();
729 static const int64_t MIN_INT64 = int64_t(0x8000000000000000LL);
730 static const int64_t MAX_INT64 = int64_t(0x7FFFFFFFFFFFFFFFLL);
733 case LIR_eqq: return insImmI(c1 == c2);
734 case LIR_ltq: return insImmI(c1 < c2);
735 case LIR_gtq: return insImmI(c1 > c2);
736 case LIR_leq: return insImmI(c1 <= c2);
737 case LIR_geq: return insImmI(c1 >= c2);
738 case LIR_ltuq: return insImmI(uint64_t(c1) < uint64_t(c2));
739 case LIR_gtuq: return insImmI(uint64_t(c1) > uint64_t(c2));
740 case LIR_leuq: return insImmI(uint64_t(c1) <= uint64_t(c2));
741 case LIR_geuq: return insImmI(uint64_t(c1) >= uint64_t(c2));
743 case LIR_orq: return insImmQ(c1 | c2);
744 case LIR_andq: return insImmQ(c1 & c2);
745 case LIR_xorq: return insImmQ(c1 ^ c2);
747 // Nb: LIR_rshq, LIR_lshq and LIR_rshuq aren't here because their
748 // RHS is an int. They are below.
751 // Overflow is only possible if both values are positive or
752 // both negative. Just like the 32-bit case, this check
753 // probably isn't necessary, because the C++ overflow
754 // behaviour is very likely to be the same as the machine code
755 // overflow behaviour, but we do it just to be safe.
756 if (c1 > 0 && c2 > 0) {
757 // Overflows if: c1 + c2 > MAX_INT64
758 // Re-express to avoid overflow in the check: c1 > MAX_INT64 - c2
759 if (c1 > MAX_INT64 - c2)
761 } else if (c1 < 0 && c2 < 0) {
762 // Overflows if: c1 + c2 < MIN_INT64
763 // Re-express to avoid overflow in the check: c1 < MIN_INT64 - c2
764 if (c1 < MIN_INT64 - c2)
767 return insImmQ(c1 + c2);
770 // Overflow is only possible if one value is positive and one
772 if (c1 > 0 && c2 < 0) {
773 // Overflows if: c1 - c2 > MAX_INT64
774 // Re-express to avoid overflow in the check: c1 > MAX_INT64 + c2
775 if (c1 > MAX_INT64 + c2)
777 } else if (c1 < 0 && c2 > 0) {
778 // Overflows if: c1 - c2 < MIN_INT64
779 // Re-express to avoid overflow in the check: c1 < MIN_INT64 + c2
780 if (c1 < MIN_INT64 + c2)
783 return insImmQ(c1 - c2);
789 } else if (oprnd1->isImmQ() && oprnd2->isImmI()) {
790 // The first operand is a quad immediate, the second is an int
792 int64_t c1 = oprnd1->immQ();
793 int32_t c2 = oprnd2->immI();
796 case LIR_lshq: return insImmQ(c1 << (c2 & 0x3f));
797 case LIR_rshq: return insImmQ(c1 >> (c2 & 0x3f));
798 case LIR_rshuq: return insImmQ(uint64_t(c1) >> (c2 & 0x3f));
802 #endif // NANOJIT_64BIT
804 } else if (oprnd1->isImmD() && oprnd2->isImmD()) {
805 // The operands are both double immediates.
806 double c1 = oprnd1->immD();
807 double c2 = oprnd2->immD();
809 case LIR_eqd: return insImmI(c1 == c2);
810 case LIR_ltd: return insImmI(c1 < c2);
811 case LIR_gtd: return insImmI(c1 > c2);
812 case LIR_led: return insImmI(c1 <= c2);
813 case LIR_ged: return insImmI(c1 >= c2);
815 case LIR_addd: return insImmD(c1 + c2);
816 case LIR_subd: return insImmD(c1 - c2);
817 case LIR_muld: return insImmD(c1 * c2);
818 case LIR_divd: return insImmD(c1 / c2);
824 //-------------------------------------------------------------------
825 // If only one operand is an immediate, make sure it's on the RHS, if possible
826 //-------------------------------------------------------------------
827 if (oprnd1->isImmAny() && !oprnd2->isImmAny()) {
843 // move immediate to RHS
850 if (isCmpOpcode(v)) {
851 // move immediate to RHS, swap the operator
855 v = invertCmpOpcode(v);
861 //-------------------------------------------------------------------
862 // Folding where the RHS is an immediate
863 //-------------------------------------------------------------------
864 if (oprnd2->isImmI()) {
865 // The second operand is an int immediate.
866 int c = oprnd2->immI();
869 if (oprnd1->isop(LIR_addi) && oprnd1->oprnd2()->isImmI()) {
870 // add(add(x,c1),c2) => add(x,c1+c2)
871 c += oprnd1->oprnd2()->immI();
873 oprnd1 = oprnd1->oprnd1();
878 if (oprnd1->isop(LIR_addi) && oprnd1->oprnd2()->isImmI()) {
879 // sub(add(x,c1),c2) => add(x,c1-c2)
880 c = oprnd1->oprnd2()->immI() - c;
882 oprnd1 = oprnd1->oprnd1();
888 if (c == 16 && oprnd1->isop(LIR_lshi) &&
889 oprnd1->oprnd2()->isImmI(16) &&
890 insIsS16(oprnd1->oprnd1()))
892 // rsh(lhs(x,16),16) == x, if x is S16
893 return oprnd1->oprnd1();
910 CASE64(LIR_lshq:) // These are here because their RHS is an int
917 case LIR_ltui: // unsigned < 0 -> always false
920 case LIR_geui: // unsigned >= 0 -> always true
924 if (oprnd1->isop(LIR_ori) &&
925 oprnd1->oprnd2()->isImmI() &&
926 oprnd1->oprnd2()->immI() != 0)
928 // (x or c) != 0 if c != 0
936 } else if (c == -1) {
938 case LIR_ori: return oprnd2; // x | -1 = -1
939 case LIR_andi: return oprnd1; // x & -1 = x
940 case LIR_gtui: return insImmI(0); // u32 > 0xffffffff -> always false
941 case LIR_leui: return insImmI(1); // u32 <= 0xffffffff -> always true
946 if (oprnd1->isCmp()) {
948 case LIR_ori: return oprnd2; // 0or1 | 1 = 1 (and oprnd2 == 1)
949 case LIR_andi: return oprnd1; // 0or1 & 1 = 0or1
950 case LIR_gtui: return insImmI(0); // 0or1 > 1 -> always false
953 } else if (v == LIR_muli) {
954 return oprnd1; // x * 1 = x
959 } else if (oprnd2->isImmQ()) {
960 // The second operand is a quad immediate.
961 int64_t c = oprnd2->immQ();
973 case LIR_ltuq: // unsigned < 0 -> always false
976 case LIR_geuq: // unsigned >= 0 -> always true
983 } else if (c == -1) {
985 case LIR_orq: return oprnd2; // x | -1 = -1
986 case LIR_andq: return oprnd1; // x & -1 = x
987 case LIR_gtuq: return insImmI(0); // u64 > 0xffffffffffffffff -> always false
988 case LIR_leuq: return insImmI(1); // u64 <= 0xffffffffffffffff -> always true
993 if (oprnd1->isCmp()) {
995 case LIR_orq: return oprnd2; // 0or1 | 1 = 1 (and oprnd2 == 1)
996 case LIR_andq: return oprnd1; // 0or1 & 1 = 0or1
997 case LIR_gtuq: return insImmI(0); // 0or1 > 1 -> always false
1002 #endif // NANOJIT_64BIT
1005 #if NJ_SOFTFLOAT_SUPPORTED
1006 //-------------------------------------------------------------------
1007 // SoftFloat-specific folding
1008 //-------------------------------------------------------------------
1010 if (v == LIR_ii2d && oprnd1->isop(LIR_dlo2i) && oprnd2->isop(LIR_dhi2i) &&
1011 (ins = oprnd1->oprnd1()) == oprnd2->oprnd1())
1013 // qjoin(qlo(x),qhi(x)) == x
1018 //-------------------------------------------------------------------
1019 // No folding possible
1020 //-------------------------------------------------------------------
1021 return out->ins2(v, oprnd1, oprnd2);
1024 LIns* ExprFilter::ins3(LOpcode v, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3)
1026 NanoAssert(oprnd1 && oprnd2 && oprnd3);
1027 NanoAssert(isCmovOpcode(v));
1028 if (oprnd2 == oprnd3) {
1032 if (oprnd1->isImmI()) {
1033 // immediate ? x : y => return x or y depending on immediate
1034 return oprnd1->immI() ? oprnd2 : oprnd3;
1036 if (oprnd1->isop(LIR_eqi) &&
1037 ((oprnd1->oprnd2() == oprnd2 && oprnd1->oprnd1() == oprnd3) ||
1038 (oprnd1->oprnd1() == oprnd2 && oprnd1->oprnd2() == oprnd3))) {
1039 // (y == x) ? x : y => y
1040 // (x == y) ? x : y => y
1044 return out->ins3(v, oprnd1, oprnd2, oprnd3);
1047 LIns* ExprFilter::insGuard(LOpcode v, LIns* c, GuardRecord *gr)
1049 if (v == LIR_xt || v == LIR_xf) {
1051 if ((v == LIR_xt && !c->immI()) || (v == LIR_xf && c->immI())) {
1052 return 0; // no guard needed
1055 // We're emitting a guard that will always fail. Any code
1056 // emitted after this guard is dead code. But it won't be
1057 // optimized away, and it could indicate a performance
1058 // problem or other bug, so assert in debug builds.
1059 NanoAssertMsg(0, "Constantly false guard detected");
1061 return out->insGuard(LIR_x, NULL, gr);
1064 while (c->isop(LIR_eqi) && c->oprnd1()->isCmp() && c->oprnd2()->isImmI(0)) {
1065 // xt(eq(cmp,0)) => xf(cmp) or xf(eq(cmp,0)) => xt(cmp)
1066 v = invertCondGuardOpcode(v);
1071 return out->insGuard(v, c, gr);
1074 // Simplify operator if possible. Always return NULL if overflow is possible.
1076 LIns* ExprFilter::simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2)
1078 LIns* oprnd1 = *opnd1;
1079 LIns* oprnd2 = *opnd2;
1081 if (oprnd1->isImmI() && oprnd2->isImmI()) {
1082 int32_t c1 = oprnd1->immI();
1083 int32_t c2 = oprnd2->immI();
1086 // The code below attempts to perform the operation while
1087 // detecting overflow. For multiplication, we may unnecessarily
1088 // infer a possible overflow due to the insufficient integer
1089 // range of the double type.
1093 case LIR_addxovi: d = double(c1) + double(c2); break;
1095 case LIR_subxovi: d = double(c1) - double(c2); break;
1097 case LIR_mulxovi: d = double(c1) * double(c2); break;
1098 default: NanoAssert(0); break;
1100 int32_t r = int32_t(d);
1104 } else if (oprnd1->isImmI() && !oprnd2->isImmI()) {
1110 // swap operands, moving immediate to RHS
1114 // swap actual arguments in caller as well
1127 if (oprnd2->isImmI()) {
1128 int c = oprnd2->immI();
1142 } else if (c == 1 && (op == LIR_muljovi || op == LIR_mulxovi)) {
1150 LIns* ExprFilter::insGuardXov(LOpcode op, LIns* oprnd1, LIns* oprnd2, GuardRecord *gr)
1152 LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
1156 return out->insGuardXov(op, oprnd1, oprnd2, gr);
1159 LIns* ExprFilter::insBranch(LOpcode v, LIns *c, LIns *t)
1161 if (v == LIR_jt || v == LIR_jf) {
1163 if ((v == LIR_jt && !c->immI()) || (v == LIR_jf && c->immI())) {
1164 return 0; // no jump needed
1167 // We're emitting a branch that will always be taken. This may
1168 // result in dead code that will not be optimized away, and
1169 // could indicate a performance problem or other bug, so assert
1171 NanoAssertMsg(0, "Constantly taken branch detected");
1173 return out->insBranch(LIR_j, NULL, t);
1176 while (c->isop(LIR_eqi) && c->oprnd1()->isCmp() && c->oprnd2()->isImmI(0)) {
1177 // jt(eq(cmp,0)) => jf(cmp) or jf(eq(cmp,0)) => jt(cmp)
1178 v = invertCondJmpOpcode(v);
1183 return out->insBranch(v, c, t);
1186 LIns* ExprFilter::insBranchJov(LOpcode op, LIns* oprnd1, LIns* oprnd2, LIns* target)
1188 LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
1192 return out->insBranchJov(op, oprnd1, oprnd2, target);
1195 LIns* ExprFilter::insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual) {
1196 if (base->isImmP() && !isS8(off)) {
1197 // if the effective address is constant, then transform:
1198 // ld const[bigconst] => ld (const+bigconst)[0]
1199 // note: we don't do this optimization for <8bit field offsets,
1200 // under the assumption that we're more likely to CSE-match the
1201 // constant base address if we dont const-fold small offsets.
1202 uintptr_t p = (uintptr_t)base->immP() + off;
1203 return out->insLoad(op, insImmP((void*)p), 0, accSet, loadQual);
1205 return out->insLoad(op, base, off, accSet, loadQual);
1208 LIns* LirWriter::insStore(LIns* value, LIns* base, int32_t d, AccSet accSet)
1210 // Determine which kind of store should be used for 'value' based on
1212 LOpcode op = LOpcode(0);
1213 switch (value->retType()) {
1214 case LTy_I: op = LIR_sti; break;
1215 #ifdef NANOJIT_64BIT
1216 case LTy_Q: op = LIR_stq; break;
1218 case LTy_D: op = LIR_std; break;
1219 case LTy_V: NanoAssert(0); break;
1220 default: NanoAssert(0); break;
1222 return insStore(op, value, base, d, accSet);
1225 LIns* LirWriter::insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov)
1227 // 'cond' must be a conditional, unless it has been optimized to 0 or
1228 // 1. In that case make it an ==0 test and flip the branches. It'll
1229 // get constant-folded by ExprFilter subsequently.
1230 if (!cond->isCmp()) {
1231 NanoAssert(cond->isImmI());
1232 cond = insEqI_0(cond);
1239 LOpcode op = LIR_cmovi;
1240 if (iftrue->isI() && iffalse->isI()) {
1242 #ifdef NANOJIT_64BIT
1243 } else if (iftrue->isQ() && iffalse->isQ()) {
1246 } else if (iftrue->isD() && iffalse->isD()) {
1249 NanoAssert(0); // type error
1251 return ins3(op, cond, iftrue, iffalse);
1254 LIns* ncond = ins1(LIR_negi, cond); // cond ? -1 : 0
1255 return ins2(LIR_ori,
1256 ins2(LIR_andi, iftrue, ncond),
1257 ins2(LIR_andi, iffalse, ins1(LIR_noti, ncond)));
1260 LIns* LirBufWriter::insCall(const CallInfo *ci, LIns* args[])
1262 LOpcode op = getCallOpcode(ci);
1263 #if NJ_SOFTFLOAT_SUPPORTED
1264 // SoftFloat: convert LIR_calld to LIR_calli.
1265 if (_config.soft_float && op == LIR_calld)
1269 int32_t argc = ci->count_args();
1270 NanoAssert(argc <= (int)MAXARGS);
1272 // Allocate space for and copy the arguments. We use the same
1273 // allocator as the normal LIR buffers so it has the same lifetime.
1274 // Nb: this must be kept in sync with arg().
1275 LIns** args2 = (LIns**)_buf->_allocator.alloc(argc * sizeof(LIns*));
1276 memcpy(args2, args, argc * sizeof(LIns*));
1278 // Allocate and write the call instruction.
1279 LInsC* insC = (LInsC*)_buf->makeRoom(sizeof(LInsC));
1280 LIns* ins = insC->getLIns();
1281 ins->initLInsC(op, args2, ci);
1285 using namespace avmplus;
1287 StackFilter::StackFilter(LirFilter *in, Allocator& alloc, LIns* sp)
1288 : LirFilter(in), sp(sp), stk(alloc), top(0)
1291 // If we see a sequence like this:
1297 // where '...' contains no guards, we can remove the first store. Also,
1298 // because stack entries are eight bytes each (we check this), if we have
1305 // we can again remove the first store -- even though the second store
1306 // doesn't clobber the high four bytes -- because we know the entire value
1307 // stored by the first store is dead.
1309 LIns* StackFilter::read()
1312 LIns* ins = in->read();
1314 if (ins->isStore()) {
1315 LIns* base = ins->oprnd2();
1317 // 'disp' must be eight-aligned because each stack entry is 8 bytes.
1318 NanoAssert((ins->disp() & 0x7) == 0);
1320 int d = ins->disp() >> 3;
1334 * NB: If there is a backward branch other than the loop-restart branch, this is
1335 * going to be wrong. Unfortunately there doesn't seem to be an easy way to detect
1336 * such branches. Just do not create any.
1338 * The isLive() call is valid because liveness will have been
1339 * computed by Assembler::gen() for every instruction following
1342 else if (ins->isGuard() && ins->isLive()) {
1358 RetiredEntry(): live(NULL), i(NULL) {}
1365 HashMap<LIns*, LIns*> live;
1366 SeqBuilder<RetiredEntry*> retired;
1369 LiveTable(Allocator& alloc)
1377 void add(LIns* ins, LIns* use) {
1378 if (!ins->isImmAny() && !live.containsKey(ins)) {
1379 NanoAssert(size_t(ins->opcode()) < sizeof(lirNames) / sizeof(lirNames[0]));
1384 void retire(LIns* i) {
1385 RetiredEntry *e = new (alloc) RetiredEntry();
1387 SeqBuilder<LIns*> livelist(alloc);
1388 HashMap<LIns*, LIns*>::Iter iter(live);
1390 while (iter.next()) {
1391 LIns* ins = iter.key();
1394 livelist.insert(ins);
1397 e->live = livelist.get();
1398 if (live_count > maxlive)
1399 maxlive = live_count;
1406 bool contains(LIns* i) {
1407 return live.containsKey(i);
1412 * traverse the LIR buffer and discover which instructions are live
1413 * by starting from instructions with side effects (stores, calls, branches)
1414 * and marking instructions used by them. Works bottom-up, in one pass.
1415 * if showLiveRefs == true, also print the set of live expressions next to
1418 void live(LirFilter* in, Allocator& alloc, Fragment *frag, LogControl *logc)
1420 // traverse backwards to find live exprs and a few other stats.
1422 LiveTable live(alloc);
1425 if (frag->lirbuf->state)
1426 live.add(frag->lirbuf->state, 0);
1427 for (LIns* ins = in->read(); !ins->isop(LIR_start); ins = in->read())
1431 // First handle instructions that are always live (ie. those that
1432 // don't require being marked as live), eg. those with
1433 // side-effects. We ignore LIR_paramp.
1434 if (ins->isLive() && !ins->isop(LIR_paramp))
1441 // now propagate liveness
1442 if (live.contains(ins))
1446 switch (ins->opcode()) {
1448 NanoAssertMsg(0, "Shouldn't see LIR_skip");
1463 // No operands, do nothing.
1501 live.add(ins->oprnd1(), 0);
1555 CASE64(LIR_addjovq:)
1556 CASE64(LIR_subjovq:)
1566 live.add(ins->oprnd1(), 0);
1567 live.add(ins->oprnd2(), 0);
1573 live.add(ins->oprnd1(), 0);
1574 live.add(ins->oprnd2(), 0);
1575 live.add(ins->oprnd3(), 0);
1582 for (int i = 0, argc = ins->argc(); i < argc; i++)
1583 live.add(ins->arg(i), 0);
1587 NanoAssertMsgf(0, "unhandled opcode: %d", ins->opcode());
1593 logc->printf(" Live instruction count %d, total %u, max pressure %d\n",
1594 live.retiredCount, total, live.maxlive);
1596 logc->printf(" Side exits %u\n", exits);
1597 logc->printf(" Showing LIR instructions with live-after variables\n");
1600 // print live exprs, going forwards
1601 LInsPrinter *printer = frag->lirbuf->printer;
1602 bool newblock = true;
1603 for (Seq<RetiredEntry*>* p = live.retired.get(); p != NULL; p = p->tail) {
1604 RetiredEntry* e = p->head;
1607 char livebuf[4000], *s=livebuf;
1609 if (!newblock && e->i->isop(LIR_label)) {
1613 for (Seq<LIns*>* p = e->live; p != NULL; p = p->tail) {
1614 VMPI_strcpy(s, printer->formatRef(&rb, p->head));
1615 s += VMPI_strlen(s);
1617 NanoAssert(s < livebuf+sizeof(livebuf));
1619 /* If the LIR insn is pretty short, print it and its
1620 live-after set on the same line. If not, put
1621 live-after set on a new line, suitably indented. */
1622 const char* insn_text = printer->formatIns(&ib, e->i);
1623 if (VMPI_strlen(insn_text) >= 30-2) {
1624 logc->printf(" %-30s\n %-30s %s\n", insn_text, "", livebuf);
1626 logc->printf(" %-30s %s\n", insn_text, livebuf);
1629 if (e->i->isGuard() || e->i->isBranch() || e->i->isRet()) {
1636 void LirNameMap::addNameWithSuffix(LIns* ins, const char *name, int suffix,
1637 bool ignoreOneSuffix) {
1638 NanoAssert(!names.containsKey(ins));
1641 if (suffix == 1 && ignoreOneSuffix) {
1642 VMPI_snprintf(name2, N, "%s", name); // don't add '1' suffix
1643 } else if (VMPI_isdigit(name[VMPI_strlen(name)-1])) {
1644 VMPI_snprintf(name2, N, "%s_%d", name, suffix); // use '_' to avoid confusion
1646 VMPI_snprintf(name2, N, "%s%d", name, suffix); // normal case
1649 char *copy = new (alloc) char[VMPI_strlen(name2)+1];
1650 VMPI_strcpy(copy, name2);
1651 Entry *e = new (alloc) Entry(copy);
1655 void LirNameMap::addName(LIns* ins, const char* name) {
1656 // The lookup may succeed, ie. we may already have a name for this
1657 // instruction. This can happen because of CSE. Eg. if we have this:
1659 // ins = addName("foo", insImmI(0))
1661 // that assigns the name "foo1" to 'ins'. If we later do this:
1663 // ins2 = addName("foo", insImmI(0))
1665 // then CSE will cause 'ins' and 'ins2' to be equal. So 'ins2'
1666 // already has a name ("foo1") and there's no need to generate a new
1669 if (!names.containsKey(ins)) {
1670 Str* str = new (alloc) Str(alloc, name);
1671 int suffix = namecounts.add(*str);
1672 addNameWithSuffix(ins, name, suffix, /*ignoreOneSuffix*/true);
1676 const char* LirNameMap::createName(LIns* ins) {
1677 if (ins->isCall()) {
1678 #if NJ_SOFTFLOAT_SUPPORTED
1679 if (ins->isop(LIR_hcalli)) {
1680 ins = ins->oprnd1(); // we've presumably seen the other half already
1684 if (!names.containsKey(ins))
1685 addNameWithSuffix(ins, ins->callInfo()->_name, funccounts.add(ins->callInfo()),
1686 /*ignoreOneSuffix*/false);
1689 if (!names.containsKey(ins))
1690 addNameWithSuffix(ins, lirNames[ins->opcode()], lircounts.add(ins->opcode()),
1691 /*ignoreOneSuffix*/false);
1694 return names.get(ins)->name;
1697 const char* LirNameMap::lookupName(LIns* ins)
1699 Entry* e = names.get(ins);
1700 return e ? e->name : NULL;
1703 char* LInsPrinter::formatAccSet(RefBuf* buf, AccSet accSet) {
1704 if (accSet == ACCSET_NONE) {
1705 VMPI_sprintf(buf->buf, ".none");
1706 } else if (accSet == ACCSET_ALL) {
1707 VMPI_sprintf(buf->buf, ".all");
1711 // The AccSet may contain bits set for regions not used by the
1712 // embedding, if any have been specified via
1713 // (ACCSET_ALL & ~ACCSET_XYZ). So only print those that are
1715 for (int i = 0; i < EMB_NUM_USED_ACCS; i++) {
1716 if (accSet & (1 << i)) {
1717 VMPI_strcat(b, ".");
1718 VMPI_strcat(b, accNames[i]);
1719 accSet &= ~(1 << i);
1722 NanoAssert(VMPI_strlen(b) < buf->len);
1727 char* LInsPrinter::formatImmI(RefBuf* buf, int32_t c) {
1728 if (-10000 < c && c < 10000) {
1729 VMPI_snprintf(buf->buf, buf->len, "%d", c);
1731 #if !defined NANOJIT_64BIT
1732 formatAddr(buf, (void*)c);
1734 VMPI_snprintf(buf->buf, buf->len, "0x%x", (unsigned int)c);
1740 #if defined NANOJIT_64BIT
1741 char* LInsPrinter::formatImmQ(RefBuf* buf, uint64_t c) {
1742 if (-10000 < (int64_t)c && c < 10000) {
1743 VMPI_snprintf(buf->buf, buf->len, "%dLL", (int)c);
1745 formatAddr(buf, (void*)c);
1751 char* LInsPrinter::formatImmD(RefBuf* buf, double c) {
1752 VMPI_snprintf(buf->buf, buf->len, "%g", c);
1756 char* LInsPrinter::formatAddr(RefBuf* buf, void* p)
1760 addrNameMap->lookupAddr(p, name, offset);
1764 VMPI_snprintf(buf->buf, buf->len, "%p %s+%d", p, name, offset);
1766 VMPI_snprintf(buf->buf, buf->len, "%p %s", p, name);
1769 VMPI_snprintf(buf->buf, buf->len, "%p", p);
1775 char* LInsPrinter::formatRef(RefBuf* buf, LIns *ref, bool showImmValue)
1777 // Give 'ref' a name if it doesn't have one.
1778 const char* name = lirNameMap->lookupName(ref);
1780 name = lirNameMap->createName(ref);
1783 // Put it in the buffer. If it's an immediate, show the value if
1784 // showImmValue==true. (This facility allows us to print immediate
1785 // values when they're used but not when they're def'd, ie. we don't
1786 // want "immi1/*1*/ = immi 1".)
1788 if (ref->isImmI() && showImmValue) {
1789 VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmI(&buf2, ref->immI()));
1791 #ifdef NANOJIT_64BIT
1792 else if (ref->isImmQ() && showImmValue) {
1793 VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmQ(&buf2, ref->immQ()));
1796 else if (ref->isImmD() && showImmValue) {
1797 VMPI_snprintf(buf->buf, buf->len, "%s/*%s*/", name, formatImmD(&buf2, ref->immD()));
1800 VMPI_snprintf(buf->buf, buf->len, "%s", name);
1806 char* LInsPrinter::formatIns(InsBuf* buf, LIns* i)
1809 size_t n = buf->len;
1810 RefBuf b1, b2, b3, b4;
1811 LOpcode op = i->opcode();
1815 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1816 lirNames[op], formatImmI(&b2, i->immI()));
1819 #ifdef NANOJIT_64BIT
1821 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1822 lirNames[op], formatImmQ(&b2, i->immQ()));
1827 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i, /*showImmValue*/false),
1828 lirNames[op], formatImmD(&b2, i->immD()));
1832 VMPI_snprintf(s, n, "%s = %s %d", formatRef(&b1, i), lirNames[op], i->size());
1837 VMPI_snprintf(s, n, "%s", lirNames[op]);
1844 const CallInfo* call = i->callInfo();
1845 int32_t argc = i->argc();
1846 int32_t m = int32_t(n); // Windows doesn't have 'ssize_t'
1847 if (call->isIndirect())
1848 m -= VMPI_snprintf(s, m, "%s = %s%s [%s] ( ", formatRef(&b1, i), lirNames[op],
1849 formatAccSet(&b2, call->_storeAccSet),
1850 formatRef(&b3, i->arg(--argc)));
1852 m -= VMPI_snprintf(s, m, "%s = %s%s #%s ( ", formatRef(&b1, i), lirNames[op],
1853 formatAccSet(&b2, call->_storeAccSet), call->_name);
1855 for (int32_t j = argc - 1; j >= 0; j--) {
1856 s += VMPI_strlen(s);
1857 m -= VMPI_snprintf(s, m, "%s ",formatRef(&b2, i->arg(j)));
1860 s += VMPI_strlen(s);
1861 m -= VMPI_snprintf(s, m, ")");
1866 int32_t m = int32_t(n); // Windows doesn't have 'ssize_t'
1867 m -= VMPI_snprintf(s, m, "%s %s [ ", lirNames[op], formatRef(&b1, i->oprnd1()));
1869 for (uint32_t j = 0, sz = i->getTableSize(); j < sz; j++) {
1870 LIns* target = i->getTarget(j);
1871 s += VMPI_strlen(s);
1872 m -= VMPI_snprintf(s, m, "%s ", target ? formatRef(&b2, target) : "unpatched");
1875 s += VMPI_strlen(s);
1876 m -= VMPI_snprintf(s, m, "]");
1881 uint32_t arg = i->paramArg();
1882 if (!i->paramKind()) {
1883 if (arg < sizeof(Assembler::argRegs)/sizeof(Assembler::argRegs[0])) {
1884 VMPI_snprintf(s, n, "%s = %s %d %s", formatRef(&b1, i), lirNames[op],
1885 arg, gpn(Assembler::argRegs[arg]));
1887 VMPI_snprintf(s, n, "%s = %s %d", formatRef(&b1, i), lirNames[op], arg);
1890 VMPI_snprintf(s, n, "%s = %s %d %s", formatRef(&b1, i), lirNames[op],
1891 arg, gpn(Assembler::savedRegs[arg]));
1897 VMPI_snprintf(s, n, "%s:", formatRef(&b1, i));
1902 VMPI_snprintf(s, n, "%s %s -> %s", lirNames[op], formatRef(&b1, i->oprnd1()),
1903 i->oprnd2() ? formatRef(&b2, i->oprnd2()) : "unpatched");
1907 VMPI_snprintf(s, n, "%s -> %s", lirNames[op],
1908 i->oprnd2() ? formatRef(&b1, i->oprnd2()) : "unpatched");
1917 VMPI_snprintf(s, n, "%s %s", lirNames[op], formatRef(&b1, i->oprnd1()));
1935 VMPI_snprintf(s, n, "%s = %s %s", formatRef(&b1, i), lirNames[op],
1936 formatRef(&b2, i->oprnd1()));
1944 formatGuard(buf, i);
1950 formatGuardXov(buf, i);
1956 CASE64(LIR_addjovq:)
1957 CASE64(LIR_subjovq:)
1958 VMPI_snprintf(s, n, "%s = %s %s, %s ; ovf -> %s", formatRef(&b1, i), lirNames[op],
1959 formatRef(&b2, i->oprnd1()),
1960 formatRef(&b3, i->oprnd2()),
1961 i->oprnd3() ? formatRef(&b4, i->oprnd3()) : "unpatched");
1964 case LIR_addi: CASE64(LIR_addq:)
1965 case LIR_subi: CASE64(LIR_subq:)
1972 case LIR_andi: CASE64(LIR_andq:)
1973 case LIR_ori: CASE64(LIR_orq:)
1974 case LIR_xori: CASE64(LIR_xorq:)
1975 case LIR_lshi: CASE64(LIR_lshq:)
1976 case LIR_rshi: CASE64(LIR_rshq:)
1977 case LIR_rshui: CASE64(LIR_rshuq:)
1978 case LIR_eqi: CASE64(LIR_eqq:)
1979 case LIR_lti: CASE64(LIR_ltq:)
1980 case LIR_lei: CASE64(LIR_leq:)
1981 case LIR_gti: CASE64(LIR_gtq:)
1982 case LIR_gei: CASE64(LIR_geq:)
1983 case LIR_ltui: CASE64(LIR_ltuq:)
1984 case LIR_leui: CASE64(LIR_leuq:)
1985 case LIR_gtui: CASE64(LIR_gtuq:)
1986 case LIR_geui: CASE64(LIR_geuq:)
1992 #if NJ_SOFTFLOAT_SUPPORTED
1995 VMPI_snprintf(s, n, "%s = %s %s, %s", formatRef(&b1, i), lirNames[op],
1996 formatRef(&b2, i->oprnd1()),
1997 formatRef(&b3, i->oprnd2()));
2003 VMPI_snprintf(s, n, "%s = %s %s ? %s : %s", formatRef(&b1, i), lirNames[op],
2004 formatRef(&b2, i->oprnd1()),
2005 formatRef(&b3, i->oprnd2()),
2006 formatRef(&b4, i->oprnd3()));
2017 const char* qualStr;
2018 switch (i->loadQual()) {
2019 case LOAD_CONST: qualStr = "/c"; break;
2020 case LOAD_NORMAL: qualStr = ""; break;
2021 case LOAD_VOLATILE: qualStr = "/v"; break;
2022 default: NanoAssert(0); qualStr = "/?"; break;
2024 VMPI_snprintf(s, n, "%s = %s%s%s %s[%d]", formatRef(&b1, i), lirNames[op],
2025 formatAccSet(&b2, i->accSet()), qualStr, formatRef(&b3, i->oprnd1()),
2036 VMPI_snprintf(s, n, "%s%s %s[%d] = %s", lirNames[op],
2037 formatAccSet(&b1, i->accSet()),
2038 formatRef(&b2, i->oprnd2()),
2040 formatRef(&b3, i->oprnd1()));
2044 VMPI_snprintf(s, n, "------------------------------ # %s", (char*)i->oprnd1());
2048 NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
2055 CseFilter::CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator& alloc)
2057 EMB_NUM_USED_ACCS(embNumUsedAccs),
2058 CSE_NUM_USED_ACCS(EMB_NUM_USED_ACCS + 2),
2059 CSE_ACC_CONST( EMB_NUM_USED_ACCS + 0),
2060 CSE_ACC_MULTIPLE( EMB_NUM_USED_ACCS + 1),
2061 storesSinceLastLoad(ACCSET_NONE),
2063 knownCmpValues(alloc),
2067 m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
2068 m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
2069 m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
2070 m_findNL[NLImmD] = &CseFilter::findImmD;
2071 m_findNL[NL1] = &CseFilter::find1;
2072 m_findNL[NL2] = &CseFilter::find2;
2073 m_findNL[NL3] = &CseFilter::find3;
2074 m_findNL[NLCall] = &CseFilter::findCall;
2076 m_capNL[NLImmISmall] = 17; // covers 0..16, which is over half the cases for TraceMonkey
2077 m_capNL[NLImmILarge] = 64;
2078 m_capNL[NLImmQ] = PTR_SIZE(0, 16);
2079 m_capNL[NLImmD] = 16;
2083 m_capNL[NLCall] = 64;
2085 // The largish allocations are fallible, the small ones are
2086 // infallible. See the comment on initOOM's declaration for why.
2088 for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
2089 m_listNL[nlkind] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
2090 if (!m_listNL[nlkind]) {
2094 m_usedNL[nlkind] = 1; // Force memset in clearAll().
2097 // Note that this allocates the CONST and MULTIPLE tables as well.
2098 for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
2100 m_listL[a] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[a]);
2105 m_usedL[a] = 1; // Force memset(0) in first clearAll().
2111 // Inlined/separated version of SuperFastHash.
2112 // This content is copyrighted by Paul Hsieh.
2113 // For reference see: http://www.azillionmonkeys.com/qed/hash.html
2115 inline uint32_t CseFilter::hash8(uint32_t hash, const uint8_t data)
2123 inline uint32_t CseFilter::hash32(uint32_t hash, const uint32_t data)
2125 const uint32_t dlo = data & 0xffff;
2126 const uint32_t dhi = data >> 16;
2128 const uint32_t tmp = (dhi << 11) ^ hash;
2129 hash = (hash << 16) ^ tmp;
2134 inline uint32_t CseFilter::hashptr(uint32_t hash, const void* data)
2136 #ifdef NANOJIT_64BIT
2137 hash = hash32(hash, uint32_t(uintptr_t(data) >> 32));
2138 hash = hash32(hash, uint32_t(uintptr_t(data)));
2141 return hash32(hash, uint32_t(data));
2145 inline uint32_t CseFilter::hashfinish(uint32_t hash)
2147 /* Force "avalanching" of final 127 bits */
2157 void CseFilter::clearNL(NLKind nlkind) {
2158 if (m_usedNL[nlkind] > 0) {
2159 VMPI_memset(m_listNL[nlkind], 0, sizeof(LIns*)*m_capNL[nlkind]);
2160 m_usedNL[nlkind] = 0;
2164 void CseFilter::clearL(CseAcc a) {
2165 if (m_usedL[a] > 0) {
2166 VMPI_memset(m_listL[a], 0, sizeof(LIns*)*m_capL[a]);
2171 void CseFilter::clearAll() {
2172 for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
2175 // Note that this clears the CONST and MULTIPLE load tables as well.
2176 for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++)
2179 knownCmpValues.clear();
2182 inline uint32_t CseFilter::hashImmI(int32_t a) {
2183 return hashfinish(hash32(0, a));
2186 inline uint32_t CseFilter::hashImmQorD(uint64_t a) {
2187 uint32_t hash = hash32(0, uint32_t(a >> 32));
2188 return hashfinish(hash32(hash, uint32_t(a)));
2191 inline uint32_t CseFilter::hash1(LOpcode op, LIns* a) {
2192 uint32_t hash = hash8(0, uint8_t(op));
2193 return hashfinish(hashptr(hash, a));
2196 inline uint32_t CseFilter::hash2(LOpcode op, LIns* a, LIns* b) {
2197 uint32_t hash = hash8(0, uint8_t(op));
2198 hash = hashptr(hash, a);
2199 return hashfinish(hashptr(hash, b));
2202 inline uint32_t CseFilter::hash3(LOpcode op, LIns* a, LIns* b, LIns* c) {
2203 uint32_t hash = hash8(0, uint8_t(op));
2204 hash = hashptr(hash, a);
2205 hash = hashptr(hash, b);
2206 return hashfinish(hashptr(hash, c));
2209 // Nb: no need to hash the load's MiniAccSet because each every load goes
2210 // into a table where all the loads have the same MiniAccSet.
2211 inline uint32_t CseFilter::hashLoad(LOpcode op, LIns* a, int32_t d) {
2212 uint32_t hash = hash8(0, uint8_t(op));
2213 hash = hashptr(hash, a);
2214 return hashfinish(hash32(hash, d));
2217 inline uint32_t CseFilter::hashCall(const CallInfo *ci, uint32_t argc, LIns* args[]) {
2218 uint32_t hash = hashptr(0, ci);
2219 for (int32_t j=argc-1; j >= 0; j--)
2220 hash = hashptr(hash,args[j]);
2221 return hashfinish(hash);
2224 bool CseFilter::growNL(NLKind nlkind)
2226 NanoAssert(nlkind != NLImmISmall);
2227 const uint32_t oldcap = m_capNL[nlkind];
2228 m_capNL[nlkind] <<= 1;
2229 // We make this allocation fallible because it's potentially large and
2230 // easy to recover from. If it fails, we won't add any more
2231 // instructions to the table and some CSE opportunities may be missed.
2232 LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
2234 LIns** oldlist = m_listNL[nlkind];
2235 m_listNL[nlkind] = tmp;
2236 VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
2237 find_t find = m_findNL[nlkind];
2238 for (uint32_t i = 0; i < oldcap; i++) {
2239 LIns* ins = oldlist[i];
2241 uint32_t j = (this->*find)(ins);
2242 NanoAssert(!m_listNL[nlkind][j]);
2243 m_listNL[nlkind][j] = ins;
2247 m_capNL[nlkind] = oldcap;
2252 bool CseFilter::growL(CseAcc cseAcc)
2254 const uint32_t oldcap = m_capL[cseAcc];
2255 m_capL[cseAcc] <<= 1;
2256 LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[cseAcc]);
2258 LIns** oldlist = m_listL[cseAcc];
2259 m_listL[cseAcc] = tmp;
2260 VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
2261 find_t find = &CseFilter::findLoad;
2262 for (uint32_t i = 0; i < oldcap; i++) {
2263 LIns* ins = oldlist[i];
2265 uint32_t j = (this->*find)(ins);
2266 NanoAssert(!m_listL[cseAcc][j]);
2267 m_listL[cseAcc][j] = ins;
2271 m_capL[cseAcc] = oldcap;
2276 void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
2278 NanoAssert(!initOOM);
2279 if (suspended) return;
2280 NLKind nlkind = NLImmISmall;
2281 NanoAssert(k < m_capNL[nlkind]);
2282 NanoAssert(!m_listNL[nlkind][k]);
2284 m_listNL[nlkind][k] = ins;
2287 void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
2289 NanoAssert(!initOOM);
2290 if (suspended) return;
2291 NanoAssert(!m_listNL[nlkind][k]);
2293 m_listNL[nlkind][k] = ins;
2294 if ((m_usedNL[nlkind] * 4) >= (m_capNL[nlkind] * 3)) { // load factor of 0.75
2295 bool ok = growNL(nlkind);
2297 // OOM: undo the insertion.
2299 m_listNL[nlkind][k] = NULL;
2304 void CseFilter::addL(LIns* ins, uint32_t k)
2306 NanoAssert(!initOOM);
2307 if (suspended) return;
2308 CseAcc cseAcc = miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual());
2309 NanoAssert(!m_listL[cseAcc][k]);
2311 m_listL[cseAcc][k] = ins;
2312 if ((m_usedL[cseAcc] * 4) >= (m_capL[cseAcc] * 3)) { // load factor of 0.75
2313 bool ok = growL(cseAcc);
2315 // OOM: undo the insertion.
2317 m_listL[cseAcc][k] = NULL;
2322 inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
2324 // This one is a direct array lookup rather than a hashtable lookup.
2325 NLKind nlkind = NLImmISmall;
2327 LIns* ins = m_listNL[nlkind][k];
2328 NanoAssert(!ins || ins->isImmI(a));
2332 uint32_t CseFilter::findImmISmall(LIns* ins)
2335 findImmISmall(ins->immI(), k);
2339 inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
2341 NLKind nlkind = NLImmILarge;
2342 const uint32_t bitmask = m_capNL[nlkind] - 1;
2343 k = hashImmI(a) & bitmask;
2346 LIns* ins = m_listNL[nlkind][k];
2349 NanoAssert(ins->isImmI());
2350 if (ins->immI() == a)
2352 // Quadratic probe: h(k,i) = h(k) + 0.5i + 0.5i^2, which gives the
2353 // sequence h(k), h(k)+1, h(k)+3, h(k)+6, h+10, ... This is a
2354 // good sequence for 2^n-sized tables as the values h(k,i) for i
2355 // in [0,m − 1] are all distinct so termination is guaranteed.
2356 // See http://portal.acm.org/citation.cfm?id=360737 and
2357 // http://en.wikipedia.org/wiki/Quadratic_probing (fetched
2358 // 06-Nov-2009) for more details.
2359 k = (k + n) & bitmask;
2364 uint32_t CseFilter::findImmILarge(LIns* ins)
2367 findImmILarge(ins->immI(), k);
2371 #ifdef NANOJIT_64BIT
2372 inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
2374 NLKind nlkind = NLImmQ;
2375 const uint32_t bitmask = m_capNL[nlkind] - 1;
2376 k = hashImmQorD(a) & bitmask;
2379 LIns* ins = m_listNL[nlkind][k];
2382 NanoAssert(ins->isImmQ());
2383 if (ins->immQ() == a)
2385 k = (k + n) & bitmask;
2390 uint32_t CseFilter::findImmQ(LIns* ins)
2393 findImmQ(ins->immQ(), k);
2398 inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
2400 NLKind nlkind = NLImmD;
2401 const uint32_t bitmask = m_capNL[nlkind] - 1;
2402 k = hashImmQorD(a) & bitmask;
2405 LIns* ins = m_listNL[nlkind][k];
2408 NanoAssert(ins->isImmD());
2409 if (ins->immDasQ() == a)
2411 k = (k + n) & bitmask;
2416 uint32_t CseFilter::findImmD(LIns* ins)
2419 findImmD(ins->immDasQ(), k);
2423 inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
2425 NLKind nlkind = NL1;
2426 const uint32_t bitmask = m_capNL[nlkind] - 1;
2427 k = hash1(op, a) & bitmask;
2430 LIns* ins = m_listNL[nlkind][k];
2433 if (ins->isop(op) && ins->oprnd1() == a)
2435 k = (k + n) & bitmask;
2440 uint32_t CseFilter::find1(LIns* ins)
2443 find1(ins->opcode(), ins->oprnd1(), k);
2447 inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
2449 NLKind nlkind = NL2;
2450 const uint32_t bitmask = m_capNL[nlkind] - 1;
2451 k = hash2(op, a, b) & bitmask;
2454 LIns* ins = m_listNL[nlkind][k];
2457 if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b)
2459 k = (k + n) & bitmask;
2464 uint32_t CseFilter::find2(LIns* ins)
2467 find2(ins->opcode(), ins->oprnd1(), ins->oprnd2(), k);
2471 inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
2473 NLKind nlkind = NL3;
2474 const uint32_t bitmask = m_capNL[nlkind] - 1;
2475 k = hash3(op, a, b, c) & bitmask;
2478 LIns* ins = m_listNL[nlkind][k];
2481 if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c)
2483 k = (k + n) & bitmask;
2488 uint32_t CseFilter::find3(LIns* ins)
2491 find3(ins->opcode(), ins->oprnd1(), ins->oprnd2(), ins->oprnd3(), k);
2495 inline LIns* CseFilter::findLoad(LOpcode op, LIns* a, int32_t d, MiniAccSet miniAccSet,
2496 LoadQual loadQual, uint32_t &k)
2498 CseAcc cseAcc = miniAccSetToCseAcc(miniAccSet, loadQual);
2499 const uint32_t bitmask = m_capL[cseAcc] - 1;
2500 k = hashLoad(op, a, d) & bitmask;
2503 LIns* ins = m_listL[cseAcc][k];
2506 // All the loads in this table should have the same miniAccSet and
2508 NanoAssert(miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual()) == cseAcc &&
2509 ins->loadQual() == loadQual);
2510 if (ins->isop(op) && ins->oprnd1() == a && ins->disp() == d)
2512 k = (k + n) & bitmask;
2517 uint32_t CseFilter::findLoad(LIns* ins)
2520 findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->miniAccSet(), ins->loadQual(), k);
2524 bool argsmatch(LIns* ins, uint32_t argc, LIns* args[])
2526 for (uint32_t j=0; j < argc; j++)
2527 if (ins->arg(j) != args[j])
2532 inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
2534 NLKind nlkind = NLCall;
2535 const uint32_t bitmask = m_capNL[nlkind] - 1;
2536 k = hashCall(ci, argc, args) & bitmask;
2539 LIns* ins = m_listNL[nlkind][k];
2542 if (ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args))
2544 k = (k + n) & bitmask;
2549 uint32_t CseFilter::findCall(LIns* ins)
2551 LIns* args[MAXARGS];
2552 uint32_t argc = ins->argc();
2553 NanoAssert(argc < MAXARGS);
2554 for (uint32_t j=0; j < argc; j++)
2555 args[j] = ins->arg(j);
2557 findCall(ins->callInfo(), argc, args, k);
2561 LIns* CseFilter::insImmI(int32_t imm)
2565 if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
2566 ins = findImmISmall(imm, k);
2568 ins = out->insImmI(imm);
2569 addNLImmISmall(ins, k);
2572 ins = findImmILarge(imm, k);
2574 ins = out->insImmI(imm);
2575 addNL(NLImmILarge, ins, k);
2578 // We assume that downstream stages do not modify the instruction, so
2579 // that we can insert 'ins' into slot 'k'. Check this.
2580 NanoAssert(ins->isop(LIR_immi) && ins->immI() == imm);
2584 #ifdef NANOJIT_64BIT
2585 LIns* CseFilter::insImmQ(uint64_t q)
2588 LIns* ins = findImmQ(q, k);
2590 ins = out->insImmQ(q);
2591 addNL(NLImmQ, ins, k);
2593 NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
2598 LIns* CseFilter::insImmD(double d)
2601 // We must pun 'd' as a uint64_t otherwise 0 and -0 will be treated as
2602 // equal, which breaks things (see bug 527288).
2608 LIns* ins = findImmD(u.u64, k);
2610 ins = out->insImmD(d);
2611 addNL(NLImmD, ins, k);
2613 NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
2617 LIns* CseFilter::ins0(LOpcode op)
2619 if (op == LIR_label && !suspended)
2621 return out->ins0(op);
2624 LIns* CseFilter::ins1(LOpcode op, LIns* a)
2627 if (isCseOpcode(op)) {
2629 ins = find1(op, a, k);
2631 ins = out->ins1(op, a);
2635 ins = out->ins1(op, a);
2637 NanoAssert(ins->isop(op) && ins->oprnd1() == a);
2641 LIns* CseFilter::ins2(LOpcode op, LIns* a, LIns* b)
2644 NanoAssert(isCseOpcode(op));
2646 ins = find2(op, a, b, k);
2648 ins = out->ins2(op, a, b);
2650 } else if (ins->isCmp()) {
2651 if (knownCmpValues.containsKey(ins)) {
2652 // We've seen this comparison before, and it was previously
2653 // used in a guard, so we know what its value must be at this
2654 // point. Replace it with a constant.
2655 NanoAssert(ins->isCmp());
2656 bool cmpValue = knownCmpValues.get(ins);
2657 return insImmI(cmpValue ? 1 : 0);
2660 NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
2664 LIns* CseFilter::ins3(LOpcode op, LIns* a, LIns* b, LIns* c)
2666 NanoAssert(isCseOpcode(op));
2668 LIns* ins = find3(op, a, b, c, k);
2670 ins = out->ins3(op, a, b, c);
2673 NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
2677 LIns* CseFilter::insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual)
2681 if (storesSinceLastLoad != ACCSET_NONE) {
2682 // Clear all normal (excludes CONST and MULTIPLE) loads
2683 // aliased by stores and calls since the last time we were in
2684 // this function. Aliased loads must be cleared even when CSE
2686 AccSet a = storesSinceLastLoad & ((1 << EMB_NUM_USED_ACCS) - 1);
2688 int acc = msbSet32(a);
2689 clearL((CseAcc)acc);
2693 // No need to clear CONST loads (those in the CSE_ACC_CONST table).
2695 // Multi-region loads must be treated conservatively -- we
2696 // always clear all of them.
2697 clearL(CSE_ACC_MULTIPLE);
2699 storesSinceLastLoad = ACCSET_NONE;
2702 if (loadQual == LOAD_VOLATILE) {
2703 // Volatile loads are never CSE'd, don't bother looking for
2704 // them or inserting them in the table.
2705 ins = out->insLoad(op, base, disp, accSet, loadQual);
2708 ins = findLoad(op, base, disp, compressAccSet(accSet), loadQual, k);
2710 ins = out->insLoad(op, base, disp, accSet, loadQual);
2714 // Nb: must compare miniAccSets, not AccSets, because the AccSet
2715 // stored in the load may have lost info if it's multi-region.
2716 NanoAssert(ins->isop(op) && ins->oprnd1() == base && ins->disp() == disp &&
2717 ins->miniAccSet().val == compressAccSet(accSet).val &&
2718 ins->loadQual() == loadQual);
2720 // If the displacement is more than 16 bits, put it in a separate
2721 // instruction. Nb: LirBufWriter also does this, we do it here
2722 // too because CseFilter relies on LirBufWriter not changing code.
2723 ins = insLoad(op, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet, loadQual);
2728 LIns* CseFilter::insStore(LOpcode op, LIns* value, LIns* base, int32_t disp, AccSet accSet)
2732 storesSinceLastLoad |= accSet;
2733 ins = out->insStore(op, value, base, disp, accSet);
2734 NanoAssert(ins->isop(op) && ins->oprnd1() == value && ins->oprnd2() == base &&
2735 ins->disp() == disp && ins->accSet() == accSet);
2737 // If the displacement is more than 16 bits, put it in a separate
2738 // instruction. Nb: LirBufWriter also does this, we do it here
2739 // too because CseFilter relies on LirBufWriter not changing code.
2740 ins = insStore(op, value, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet);
2745 LIns* CseFilter::insGuard(LOpcode op, LIns* c, GuardRecord *gr)
2747 // LIR_xt and LIR_xf guards are CSEable. Note that we compare the
2748 // opcode and condition when determining if two guards are equivalent
2749 // -- in find1() and hash1() -- but we do *not* compare the
2750 // GuardRecord. This works because:
2751 // - If guard 1 is taken (exits) then guard 2 is never reached, so
2752 // guard 2 can be removed.
2753 // - If guard 1 is not taken then neither is guard 2, so guard 2 can
2756 // The underlying assumptions that are required for this to be safe:
2757 // - There's never a path from the side exit of guard 1 back to guard
2758 // 2; for tree-shaped fragments this should be true.
2759 // - GuardRecords do not contain information other than what is needed
2760 // to execute a successful exit. That is currently true.
2761 // - The CSE algorithm will always keep guard 1 and remove guard 2
2762 // (not vice versa). The current algorithm does this.
2765 if (isCseOpcode(op)) {
2766 // conditional guard
2768 ins = find1(op, c, k);
2770 ins = out->insGuard(op, c, gr);
2773 // After this guard, we know that 'c's result was true (if
2774 // op==LIR_xf) or false (if op==LIR_xt), else we would have
2775 // exited. Record this fact in case 'c' occurs again.
2777 bool c_value = (op == LIR_xt ? false : true);
2778 knownCmpValues.put(c, c_value);
2781 ins = out->insGuard(op, c, gr);
2783 NanoAssert(ins->isop(op) && ins->oprnd1() == c);
2787 LIns* CseFilter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr)
2789 // LIR_*xov are CSEable. See CseFilter::insGuard() for details.
2790 NanoAssert(isCseOpcode(op));
2791 // conditional guard
2793 LIns* ins = find2(op, a, b, k);
2795 ins = out->insGuardXov(op, a, b, gr);
2798 NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
2802 // There is no CseFilter::insBranchJov(), as LIR_*jov* are not CSEable.
2804 LIns* CseFilter::insCall(const CallInfo *ci, LIns* args[])
2807 uint32_t argc = ci->count_args();
2809 NanoAssert(ci->_storeAccSet == ACCSET_NONE);
2811 ins = findCall(ci, argc, args, k);
2813 ins = out->insCall(ci, args);
2814 addNL(NLCall, ins, k);
2817 // We only need to worry about aliasing if !ci->_isPure.
2818 storesSinceLastLoad |= ci->_storeAccSet;
2819 ins = out->insCall(ci, args);
2821 NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
2825 // Interval analysis can be done much more accurately than we do here.
2826 // For speed and simplicity in a number of cases (eg. LIR_andi, LIR_rshi)
2827 // we just look for easy-to-handle (but common!) cases such as when the
2828 // RHS is a constant; in practice this gives good results. It also cuts
2829 // down the amount of backwards traversals we have to do, which is good.
2831 // 'lim' also limits the number of backwards traversals; it's decremented
2832 // on each recursive call and we give up when it reaches zero. This
2833 // prevents possible time blow-ups in long expression chains. We don't
2834 // check 'lim' at the top of this function, as you might expect, because
2835 // the behaviour when the limit is reached depends on the opcode.
2837 Interval Interval::of(LIns* ins, int lim)
2839 switch (ins->opcode()) {
2841 int32_t i = ins->immI();
2842 return Interval(i, i);
2845 case LIR_ldc2i: return Interval( -128, 127);
2846 case LIR_lduc2ui: return Interval( 0, 255);
2847 case LIR_lds2i: return Interval(-32768, 32767);
2848 case LIR_ldus2ui: return Interval( 0, 65535);
2854 return add(of(ins->oprnd1(), lim-1), of(ins->oprnd2(), lim-1));
2861 return sub(of(ins->oprnd1(), lim-1), of(ins->oprnd2(), lim-1));
2866 return sub(Interval(0, 0), of(ins->oprnd1(), lim-1));
2873 return mul(of(ins->oprnd1(), lim), of(ins->oprnd2(), lim));
2877 // Only handle one common case accurately, for speed and simplicity.
2878 if (ins->oprnd2()->isImmI() && ins->oprnd2()->immI() > 0) {
2879 // Example: andi [lo,hi], 0xffff --> [0, 0xffff]
2880 return Interval(0, ins->oprnd2()->immI());
2882 goto worst_non_overflow;
2886 // Only handle one common case accurately, for speed and simplicity.
2887 if (ins->oprnd2()->isImmI() && lim > 0) {
2888 Interval x = of(ins->oprnd1(), lim-1);
2889 int32_t y = ins->oprnd2()->immI() & 0x1f; // we only use the bottom 5 bits
2890 NanoAssert(x.isSane());
2891 if (!x.hasOverflowed && (x.lo >= 0 || y > 0)) {
2892 // If LHS is non-negative or RHS is positive, the result is
2893 // non-negative because the top bit must be zero.
2894 // Example: rshui [0,hi], 16 --> [0, hi>>16]
2895 return Interval(0, x.hi >> y);
2898 goto worst_non_overflow;
2902 // Only handle one common case accurately, for speed and simplicity.
2903 if (ins->oprnd2()->isImmI()) {
2904 // Example: rshi [lo,hi], 16 --> [32768, 32767]
2905 int32_t y = ins->oprnd2()->immI() & 0x1f; // we only use the bottom 5 bits
2906 return Interval(-(1 << (31 - y)),
2907 (1 << (31 - y)) - 1);
2909 goto worst_non_overflow;
2912 #if defined NANOJIT_IA32 || defined NANOJIT_X64
2914 NanoAssert(ins->oprnd1()->isop(LIR_divi));
2915 LIns* op2 = ins->oprnd1()->oprnd2();
2916 // Only handle one common case accurately, for speed and simplicity.
2917 if (op2->isImmI() && op2->immI() != 0) {
2918 int32_t y = op2->immI();
2919 int32_t absy = (y >= 0) ? y : -y;
2920 // The result must smaller in magnitude than 'y'.
2921 // Example: modi [lo,hi], 5 --> [-4, 4]
2922 return Interval(-absy + 1, absy - 1);
2924 goto worst_non_overflow;
2930 Interval x = of(ins->oprnd2(), lim-1);
2931 Interval y = of(ins->oprnd3(), lim-1);
2932 NanoAssert(x.isSane() && y.isSane());
2933 if (!x.hasOverflowed && !y.hasOverflowed)
2934 return Interval(NJ_MIN(x.lo, y.lo), NJ_MAX(x.hi, y.hi));
2939 case LIR_eqi: CASE64(LIR_eqq:)
2940 case LIR_lti: CASE64(LIR_ltq:)
2941 case LIR_lei: CASE64(LIR_leq:)
2942 case LIR_gti: CASE64(LIR_gtq:)
2943 case LIR_gei: CASE64(LIR_geq:)
2944 case LIR_ltui: CASE64(LIR_ltuq:)
2945 case LIR_leui: CASE64(LIR_leuq:)
2946 case LIR_gtui: CASE64(LIR_gtuq:)
2947 case LIR_geui: CASE64(LIR_geuq:)
2953 return Interval(0, 1);
2969 goto worst_non_overflow;
2972 NanoAssertMsgf(0, "%s", lirNames[ins->opcode()]);
2976 return OverflowInterval();
2979 // Only cases that cannot overflow should reach here, ie. not add/sub/mul.
2980 return Interval(I32_MIN, I32_MAX);
2983 Interval Interval::add(Interval x, Interval y) {
2984 NanoAssert(x.isSane() && y.isSane());
2986 if (x.hasOverflowed || y.hasOverflowed)
2987 return OverflowInterval();
2989 // Nb: the bounds in x and y are known to fit in 32 bits (isSane()
2990 // checks that) so x.lo+y.lo and x.hi+y.hi are guaranteed to fit
2991 // in 64 bits. This also holds for the other cases below such as
2993 return Interval(x.lo + y.lo, x.hi + y.hi);
2996 Interval Interval::sub(Interval x, Interval y) {
2997 NanoAssert(x.isSane() && y.isSane());
2999 if (x.hasOverflowed || y.hasOverflowed)
3000 return OverflowInterval();
3002 return Interval(x.lo - y.hi, x.hi - y.lo);
3005 Interval Interval::mul(Interval x, Interval y) {
3006 NanoAssert(x.isSane() && y.isSane());
3008 if (x.hasOverflowed || y.hasOverflowed)
3009 return OverflowInterval();
3011 int64_t a = x.lo * y.lo;
3012 int64_t b = x.lo * y.hi;
3013 int64_t c = x.hi * y.lo;
3014 int64_t d = x.hi * y.hi;
3015 return Interval(NJ_MIN(NJ_MIN(a, b), NJ_MIN(c, d)),
3016 NJ_MAX(NJ_MAX(a, b), NJ_MAX(c, d)));
3019 #if NJ_SOFTFLOAT_SUPPORTED
3020 static int32_t FASTCALL d2i(double d) { return (int32_t) d; }
3021 static double FASTCALL i2d(int32_t i) { return i; }
3022 static double FASTCALL ui2d(uint32_t u) { return u; }
3023 static double FASTCALL negd(double a) { return -a; }
3024 static double FASTCALL addd(double a, double b) { return a + b; }
3025 static double FASTCALL subd(double a, double b) { return a - b; }
3026 static double FASTCALL muld(double a, double b) { return a * b; }
3027 static double FASTCALL divd(double a, double b) { return a / b; }
3028 static int32_t FASTCALL eqd(double a, double b) { return a == b; }
3029 static int32_t FASTCALL ltd(double a, double b) { return a < b; }
3030 static int32_t FASTCALL gtd(double a, double b) { return a > b; }
3031 static int32_t FASTCALL led(double a, double b) { return a <= b; }
3032 static int32_t FASTCALL ged(double a, double b) { return a >= b; }
3034 #define SIG_I_D CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_D)
3035 #define SIG_D_I CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_I)
3036 #define SIG_D_UI CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_UI)
3037 #define SIG_D_D CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_D)
3038 #define SIG_D_DD CallInfo::typeSig2(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D)
3039 #define SIG_B_DD CallInfo::typeSig2(ARGTYPE_B, ARGTYPE_D, ARGTYPE_D)
3041 #define SF_CALLINFO(name, typesig) \
3042 static const CallInfo name##_ci = \
3043 { (intptr_t)&name, typesig, ABI_FASTCALL, /*isPure*/1, ACCSET_NONE verbose_only(, #name) }
3045 SF_CALLINFO(d2i, SIG_I_D);
3046 SF_CALLINFO(i2d, SIG_D_I);
3047 SF_CALLINFO(ui2d, SIG_D_UI);
3048 SF_CALLINFO(negd, SIG_D_D);
3049 SF_CALLINFO(addd, SIG_D_DD);
3050 SF_CALLINFO(subd, SIG_D_DD);
3051 SF_CALLINFO(muld, SIG_D_DD);
3052 SF_CALLINFO(divd, SIG_D_DD);
3053 SF_CALLINFO(eqd, SIG_B_DD);
3054 SF_CALLINFO(ltd, SIG_B_DD);
3055 SF_CALLINFO(gtd, SIG_B_DD);
3056 SF_CALLINFO(led, SIG_B_DD);
3057 SF_CALLINFO(ged, SIG_B_DD);
3059 SoftFloatOps::SoftFloatOps()
3061 memset(opmap, 0, sizeof(opmap));
3062 opmap[LIR_d2i] = &d2i_ci;
3063 opmap[LIR_i2d] = &i2d_ci;
3064 opmap[LIR_ui2d] = &ui2d_ci;
3065 opmap[LIR_negd] = &negd_ci;
3066 opmap[LIR_addd] = &addd_ci;
3067 opmap[LIR_subd] = &subd_ci;
3068 opmap[LIR_muld] = &muld_ci;
3069 opmap[LIR_divd] = &divd_ci;
3070 opmap[LIR_eqd] = &eqd_ci;
3071 opmap[LIR_ltd] = <d_ci;
3072 opmap[LIR_gtd] = >d_ci;
3073 opmap[LIR_led] = &led_ci;
3074 opmap[LIR_ged] = &ged_ci;
3077 const SoftFloatOps softFloatOps;
3079 SoftFloatFilter::SoftFloatFilter(LirWriter *out) : LirWriter(out)
3082 LIns* SoftFloatFilter::split(LIns *a) {
3083 if (a->isD() && !a->isop(LIR_ii2d)) {
3084 // all F64 args must be qjoin's for soft-float
3085 a = ins2(LIR_ii2d, ins1(LIR_dlo2i, a), ins1(LIR_dhi2i, a));
3090 LIns* SoftFloatFilter::split(const CallInfo *call, LIns* args[]) {
3091 LIns *lo = out->insCall(call, args);
3092 LIns *hi = out->ins1(LIR_hcalli, lo);
3093 return out->ins2(LIR_ii2d, lo, hi);
3096 LIns* SoftFloatFilter::callD1(const CallInfo *call, LIns *a) {
3097 LIns *args[] = { split(a) };
3098 return split(call, args);
3101 LIns* SoftFloatFilter::callI1(const CallInfo *call, LIns *a) {
3102 LIns *args[] = { split(a) };
3103 return out->insCall(call, args);
3106 LIns* SoftFloatFilter::callD2(const CallInfo *call, LIns *a, LIns *b) {
3107 LIns *args[] = { split(b), split(a) };
3108 return split(call, args);
3111 LIns* SoftFloatFilter::cmpD(const CallInfo *call, LIns *a, LIns *b) {
3112 LIns *args[] = { split(b), split(a) };
3113 return out->ins2(LIR_eqi, out->insCall(call, args), out->insImmI(1));
3116 LIns* SoftFloatFilter::ins1(LOpcode op, LIns *a) {
3117 const CallInfo *ci = softFloatOps.opmap[op];
3119 if (ci->returnType() == ARGTYPE_D)
3120 return callD1(ci, a);
3122 return callI1(ci, a);
3125 return out->ins1(op, split(a));
3126 return out->ins1(op, a);
3129 LIns* SoftFloatFilter::ins2(LOpcode op, LIns *a, LIns *b) {
3130 const CallInfo *ci = softFloatOps.opmap[op];
3132 if (isCmpDOpcode(op))
3133 return cmpD(ci, a, b);
3134 return callD2(ci, a, b);
3136 return out->ins2(op, a, b);
3139 LIns* SoftFloatFilter::insCall(const CallInfo *ci, LIns* args[]) {
3140 uint32_t nArgs = ci->count_args();
3141 for (uint32_t i = 0; i < nArgs; i++)
3142 args[i] = split(args[i]);
3144 if (ci->returnType() == ARGTYPE_D) {
3145 // This function returns a double as two 32bit values, so replace
3146 // call with qjoin(qhi(call), call).
3147 return split(ci, args);
3149 return out->insCall(ci, args);
3151 #endif // NJ_SOFTFLOAT_SUPPORTED
3154 #endif /* FEATURE_NANOJIT */
3156 #if defined(NJ_VERBOSE)
3157 AddrNameMap::AddrNameMap(Allocator& a)
3158 : allocator(a), names(a)
3161 void AddrNameMap::addAddrRange(const void *p, size_t size, size_t align, const char *name)
3163 if (!this || names.containsKey(p))
3165 char* copy = new (allocator) char[VMPI_strlen(name)+1];
3166 VMPI_strcpy(copy, name);
3167 Entry *e = new (allocator) Entry(copy, size << align, align);
3171 void AddrNameMap::lookupAddr(void *p, char*& name, int32_t& offset)
3173 const void *start = names.findNear(p);
3175 Entry *e = names.get(start);
3176 const void *end = (const char*)start + e->size;
3181 else if (p > start && p < end) {
3183 offset = int32_t(intptr_t(p)-intptr_t(start)) >> e->align;
3195 // ---------------------------------------------------------------
3196 // START debug-logging definitions
3197 // ---------------------------------------------------------------
3199 void LogControl::printf( const char* format, ... )
3202 va_start(vargs, format);
3203 vfprintf(stdout, format, vargs);
3205 // Flush every line immediately so that if crashes occur in generated
3206 // code we won't lose any output.
3210 #endif // NJ_VERBOSE
3213 #ifdef FEATURE_NANOJIT
3215 const char* ValidateWriter::type2string(LTy type)
3218 case LTy_V: return "void";
3219 case LTy_I: return "int";
3220 #ifdef NANOJIT_64BIT
3221 case LTy_Q: return "quad";
3223 case LTy_D: return "double";
3224 default: NanoAssert(0); return "???";
3228 void ValidateWriter::typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[])
3230 NanoAssert(nArgs >= 0);
3232 // Type-check the arguments.
3233 for (int i = 0; i < nArgs; i++) {
3234 LTy formal = formals[i];
3235 LTy actual = args[i]->retType();
3236 if (formal != actual) {
3237 // Assert on a type error. The disadvantage of doing this (as
3238 // opposed to printing a message and continuing) is that at
3239 // most one type error will be detected per run. But type
3240 // errors should be rare, and assertion failures are certain
3241 // to be caught by test suites whereas error messages may not
3244 "LIR type error (%s): arg %d of '%s' is '%s' "
3245 "which has type %s (expected %s)",
3246 whereInPipeline, i+1, lirNames[op],
3247 lirNames[args[i]->opcode()],
3248 type2string(actual), type2string(formal));
3253 void ValidateWriter::errorStructureShouldBe(LOpcode op, const char* argDesc, int argN,
3254 LIns* arg, const char* shouldBeDesc)
3257 "LIR structure error (%s): %s %d of '%s' is '%s' (expected %s)",
3258 whereInPipeline, argDesc, argN,
3259 lirNames[op], lirNames[arg->opcode()], shouldBeDesc);
3262 void ValidateWriter::errorAccSet(const char* what, AccSet accSet, const char* shouldDesc)
3266 "LIR AccSet error (%s): '%s' AccSet is '%s'; %s",
3267 whereInPipeline, what, printer->formatAccSet(&b, accSet), shouldDesc);
3270 void ValidateWriter::errorLoadQual(const char* what, LoadQual loadQual)
3273 "LIR LoadQual error (%s): '%s' loadQual is '%d'",
3274 whereInPipeline, what, loadQual);
3277 void ValidateWriter::checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins)
3279 // We could introduce a LTy_B32 type in the type system but that's a
3280 // bit weird because its representation is identical to LTy_I. It's
3281 // easier to just do this check structurally. Also, optimization can
3282 // cause the condition to become a LIR_immi.
3283 if (!ins->isCmp() && !ins->isImmI())
3284 errorStructureShouldBe(op, "argument", argN, ins, "a condition or 32-bit constant");
3287 void ValidateWriter::checkLInsIsNull(LOpcode op, int argN, LIns* ins)
3290 errorStructureShouldBe(op, "argument", argN, ins, NULL);
3293 void ValidateWriter::checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2)
3295 if (!ins->isop(op2))
3296 errorStructureShouldBe(op, "argument", argN, ins, lirNames[op2]);
3299 ValidateWriter::ValidateWriter(LirWriter *out, LInsPrinter* printer, const char* where)
3300 : LirWriter(out), printer(printer), whereInPipeline(where),
3301 checkAccSetExtras(0)
3304 LIns* ValidateWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet,
3307 checkAccSet(op, base, d, accSet);
3315 errorLoadQual(lirNames[op], loadQual);
3321 LTy formals[1] = { LTy_P };
3322 LIns* args[1] = { base };
3338 typeCheckArgs(op, nArgs, formals, args);
3340 return out->insLoad(op, base, d, accSet, loadQual);
3343 LIns* ValidateWriter::insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet)
3345 checkAccSet(op, base, d, accSet);
3348 LTy formals[2] = { LTy_V, LTy_P }; // LTy_V is overwritten shortly
3349 LIns* args[2] = { value, base };
3358 #ifdef NANOJIT_64BIT
3373 typeCheckArgs(op, nArgs, formals, args);
3375 return out->insStore(op, value, base, d, accSet);
3378 LIns* ValidateWriter::ins0(LOpcode op)
3389 // No args to type-check.
3391 return out->ins0(op);
3394 LIns* ValidateWriter::ins1(LOpcode op, LIns* a)
3398 LIns* args[1] = { a };
3410 #ifdef NANOJIT_64BIT
3424 #if defined NANOJIT_IA32 || defined NANOJIT_X64
3425 case LIR_modi: // see LIRopcode.tbl for why 'mod' is unary
3426 checkLInsHasOpcode(op, 1, a, LIR_divi);
3431 #if NJ_SOFTFLOAT_SUPPORTED
3438 // The operand of a LIR_hcalli is LIR_calli, even though the
3439 // function being called has a return type of LTy_D.
3440 checkLInsHasOpcode(op, 1, a, LIR_calli);
3455 // These will never get hit since VTUNE implies !DEBUG. Ignore for the moment.
3460 NanoAssertMsgf(0, "%s\n", lirNames[op]);
3463 typeCheckArgs(op, nArgs, formals, args);
3465 return out->ins1(op, a);
3468 LIns* ValidateWriter::ins2(LOpcode op, LIns* a, LIns* b)
3472 LIns* args[2] = { a, b };
3498 #if NJ_SOFTFLOAT_SUPPORTED
3505 #ifdef NANOJIT_64BIT
3549 typeCheckArgs(op, nArgs, formals, args);
3551 return out->ins2(op, a, b);
3554 LIns* ValidateWriter::ins3(LOpcode op, LIns* a, LIns* b, LIns* c)
3557 LTy formals[3] = { LTy_I, LTy_V, LTy_V }; // LTy_V gets overwritten
3558 LIns* args[3] = { a, b, c };
3562 checkLInsIsACondOrConst(op, 1, a);
3567 #ifdef NANOJIT_64BIT
3569 checkLInsIsACondOrConst(op, 1, a);
3576 checkLInsIsACondOrConst(op, 1, a);
3585 typeCheckArgs(op, nArgs, formals, args);
3587 return out->ins3(op, a, b, c);
3590 LIns* ValidateWriter::insParam(int32_t arg, int32_t kind)
3592 return out->insParam(arg, kind);
3595 LIns* ValidateWriter::insImmI(int32_t imm)
3597 return out->insImmI(imm);
3600 #ifdef NANOJIT_64BIT
3601 LIns* ValidateWriter::insImmQ(uint64_t imm)
3603 return out->insImmQ(imm);
3607 LIns* ValidateWriter::insImmD(double d)
3609 return out->insImmD(d);
3612 static const char* argtypeNames[] = {
3613 "void", // ARGTYPE_V = 0
3614 "int32_t", // ARGTYPE_I = 1
3615 "uint32_t", // ARGTYPE_UI = 2
3616 "uint64_t", // ARGTYPE_Q = 3
3617 "double" // ARGTYPE_D = 4
3620 LIns* ValidateWriter::insCall(const CallInfo *ci, LIns* args0[])
3622 ArgType argTypes[MAXARGS];
3623 uint32_t nArgs = ci->getArgTypes(argTypes);
3624 LTy formals[MAXARGS];
3625 LIns* args[MAXARGS]; // in left-to-right order, unlike args0[]
3627 LOpcode op = getCallOpcode(ci);
3628 ArgType retType = ci->returnType();
3630 if ((op == LIR_callv) != (retType == ARGTYPE_V) ||
3631 (op == LIR_calli) != (retType == ARGTYPE_UI ||
3632 retType == ARGTYPE_I) ||
3633 #ifdef NANOJIT_64BIT
3634 (op == LIR_callq) != (retType == ARGTYPE_Q) ||
3636 (op == LIR_calld) != (retType == ARGTYPE_D)) {
3638 "LIR structure error (%s): return type mismatch: opcode %s with %s return type",
3639 whereInPipeline, lirNames[op], argtypeNames[retType]);
3642 if (op == LIR_callv && ci->_isPure) {
3643 // Since nobody can use the result of a void call, any pure call
3644 // would just be dead. This is probably a mistake.
3646 "LIR structure error (%s): LIR_callv must only be used with nonpure functions.",
3650 if (ci->_isPure && ci->_storeAccSet != ACCSET_NONE)
3651 errorAccSet(ci->_name, ci->_storeAccSet, "it should be ACCSET_NONE for pure functions");
3653 // This loop iterates over the args from right-to-left (because arg()
3654 // and getArgTypes() use right-to-left order), but puts the results
3655 // into formals[] and args[] in left-to-right order so that arg
3656 // numbers in error messages make sense to the user.
3657 for (uint32_t i = 0; i < nArgs; i++) {
3658 uint32_t i2 = nArgs - i - 1; // converts right-to-left to left-to-right
3659 switch (argTypes[i]) {
3661 case ARGTYPE_UI: formals[i2] = LTy_I; break;
3662 #ifdef NANOJIT_64BIT
3663 case ARGTYPE_Q: formals[i2] = LTy_Q; break;
3665 case ARGTYPE_D: formals[i2] = LTy_D; break;
3666 default: NanoAssertMsgf(0, "%d %s\n", argTypes[i],ci->_name); formals[i2] = LTy_V; break;
3668 args[i2] = args0[i];
3671 typeCheckArgs(op, nArgs, formals, args);
3673 return out->insCall(ci, args0);
3676 LIns* ValidateWriter::insGuard(LOpcode op, LIns *cond, GuardRecord *gr)
3678 int nArgs = -1; // init to shut compilers up
3685 checkLInsIsNull(op, 1, cond);
3691 checkLInsIsACondOrConst(op, 1, cond);
3699 formals[0] = LTy_I; // unlike xt/xf/jt/jf, this is an index, not a condition
3707 typeCheckArgs(op, nArgs, formals, args);
3709 return out->insGuard(op, cond, gr);
3712 LIns* ValidateWriter::insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord* gr)
3715 LTy formals[2] = { LTy_I, LTy_I };
3716 LIns* args[2] = { a, b };
3728 typeCheckArgs(op, nArgs, formals, args);
3730 return out->insGuardXov(op, a, b, gr);
3733 LIns* ValidateWriter::insBranch(LOpcode op, LIns* cond, LIns* to)
3735 int nArgs = -1; // init to shut compilers up
3741 checkLInsIsNull(op, 1, cond);
3747 checkLInsIsACondOrConst(op, 1, cond);
3757 // We check that target is a label in ValidateReader because it may
3758 // not have been set here.
3760 typeCheckArgs(op, nArgs, formals, args);
3762 return out->insBranch(op, cond, to);
3765 LIns* ValidateWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* to)
3769 LIns* args[2] = { a, b };
3779 #ifdef NANOJIT_64BIT
3790 // We check that target is a label in ValidateReader because it may
3791 // not have been set here.
3793 typeCheckArgs(op, nArgs, formals, args);
3795 return out->insBranchJov(op, a, b, to);
3798 LIns* ValidateWriter::insAlloc(int32_t size)
3800 return out->insAlloc(size);
3803 LIns* ValidateWriter::insJtbl(LIns* index, uint32_t size)
3806 LTy formals[1] = { LTy_I };
3807 LIns* args[1] = { index };
3809 typeCheckArgs(LIR_jtbl, nArgs, formals, args);
3811 // We check that all jump table entries are labels in ValidateReader
3812 // because they won't have been set here.
3814 return out->insJtbl(index, size);
3817 ValidateReader::ValidateReader(LirFilter* in) : LirFilter(in)
3820 LIns* ValidateReader::read()
3822 LIns *ins = in->read();
3823 switch (ins->opcode()) {
3827 NanoAssert(ins->getTarget() && ins->oprnd2()->isop(LIR_label));
3833 CASE64(LIR_addjovq:)
3834 CASE64(LIR_subjovq:)
3835 NanoAssert(ins->getTarget() && ins->oprnd3()->isop(LIR_label));
3839 uint32_t tableSize = ins->getTableSize();
3840 NanoAssert(tableSize > 0);
3841 for (uint32_t i = 0; i < tableSize; i++) {
3842 LIns* target = ins->getTarget(i);
3844 NanoAssert(target->isop(LIR_label));