1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=4 sw=4 et tw=99:
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is LIR Assembler code, released 2009.
18 * The Initial Developer of the Original Code is
19 * Mozilla Corporation.
20 * Portions created by the Initial Developer are Copyright (C) 2009
21 * the Initial Developer. All Rights Reserved.
24 * Graydon Hoare <graydon@mozilla.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
49 #include <sys/types.h>
59 #include "nanojit/nanojit.h"
61 using namespace nanojit;
64 /* Allocator SPI implementation. */
67 nanojit::Allocator::allocChunk(size_t nbytes, bool /*fallible*/)
69 void *p = malloc(nbytes);
76 nanojit::Allocator::freeChunk(void *p) {
81 nanojit::Allocator::postReset() {
85 struct LasmSideExit : public SideExit {
90 /* LIR SPI implementation */
93 nanojit::StackFilter::getTop(LIns*)
98 // We lump everything into a single access region for lirasm.
99 static const AccSet ACCSET_OTHER = (1 << 0);
100 static const uint8_t LIRASM_NUM_USED_ACCS = 1;
102 #if defined NJ_VERBOSE
104 nanojit::LInsPrinter::formatGuard(InsBuf *buf, LIns *ins)
107 LasmSideExit *x = (LasmSideExit *)ins->record()->exit;
108 VMPI_snprintf(buf->buf, buf->len,
109 "%s: %s %s -> line=%ld (GuardID=%03d)",
111 lirNames[ins->opcode()],
112 ins->oprnd1() ? formatRef(&b2, ins->oprnd1()) : "",
114 ins->record()->profGuardID);
118 nanojit::LInsPrinter::formatGuardXov(InsBuf *buf, LIns *ins)
121 LasmSideExit *x = (LasmSideExit *)ins->record()->exit;
122 VMPI_snprintf(buf->buf, buf->len,
123 "%s = %s %s, %s -> line=%ld (GuardID=%03d)",
125 lirNames[ins->opcode()],
126 formatRef(&b2, ins->oprnd1()),
127 formatRef(&b3, ins->oprnd2()),
129 ins->record()->profGuardID);
133 nanojit::LInsPrinter::accNames[] = {
134 "o", // (1 << 0) == ACCSET_OTHER
135 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", // 1..10 (unused)
136 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", // 11..20 (unused)
137 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", // 21..30 (unused)
143 void ValidateWriter::checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet)
148 NanoAssert(accSet == ACCSET_OTHER);
152 typedef int32_t (FASTCALL *RetInt)();
153 typedef int64_t (FASTCALL *RetQuad)();
154 typedef double (FASTCALL *RetDouble)();
155 typedef GuardRecord* (FASTCALL *RetGuard)();
159 struct nanojit::CallInfo callInfo;
172 #define DEBUG_ONLY_NAME(name) ,#name
174 #define DEBUG_ONLY_NAME(name)
177 #define CI(name, args) \
178 {(uintptr_t) (&name), args, nanojit::ABI_CDECL, /*isPure*/0, ACCSET_STORE_ANY \
179 DEBUG_ONLY_NAME(name)}
181 #define FN(name, args) \
182 {#name, CI(name, args)}
185 NAME, NUMBER, PUNCT, NEWLINE
195 startsWith(const string &s, const string &prefix)
197 return s.size() >= prefix.size() && s.compare(0, prefix.length(), prefix) == 0;
200 // LIR files must be ASCII, for simplicity.
201 class LirTokenStream {
203 LirTokenStream(istream &in) : mIn(in), mLineno(0) {}
205 bool get(LirToken &token) {
207 if (!getline(mIn, mLine))
212 mLine.erase(0, mLine.find_first_not_of(" \t\v\r"));
214 size_t e = mLine.find_first_not_of("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$.+-");
215 if (startsWith(mLine, "->")) {
220 string s = mLine.substr(0, e);
222 if (e > 1 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
224 else if (isdigit(s[0]) || (e > 1 && s[0] == '.' && isdigit(s[1])))
229 } else if (strchr(":,=[]()", c)) {
233 } else if (c == ';' || c == '\n') {
234 token.type = NEWLINE;
238 cerr << "line " << mLineno << ": error: Unrecognized character in file." << endl;
242 token.lineno = mLineno;
246 bool eat(LirTokenType type, const char *exact = NULL) {
248 return (get(token) && token.type == type && (exact == NULL || token.data == exact));
251 bool getName(string &name) {
253 if (get(t) && t.type == NAME) {
266 class LirasmFragment {
276 ReturnType mReturnType;
278 map<string, LIns*> mLabels;
281 typedef map<string, LirasmFragment> Fragments;
285 Lirasm(bool verbose);
288 void assemble(istream &in, bool optimize);
289 void assembleRandom(int nIns, bool optimize);
290 bool lookupFunction(const string &name, CallInfo *&ci);
294 avmplus::AvmCore mCore;
296 CodeAlloc mCodeAlloc;
298 Fragments mFragments;
300 map<string, LOpcode> mOpMap;
302 void bad(const string &msg) {
303 cerr << "error: " << msg << endl;
308 void handlePatch(LirTokenStream &in);
311 class FragmentAssembler {
313 FragmentAssembler(Lirasm &parent, const string &fragmentName, bool optimize);
314 ~FragmentAssembler();
316 void assembleFragment(LirTokenStream &in,
318 const LirToken *firstToken);
320 void assembleRandomFragment(int nIns);
323 static uint32_t sProfId;
325 FragmentAssembler(const FragmentAssembler &);
326 FragmentAssembler & operator=(const FragmentAssembler &);
327 LasmSideExit *createSideExit();
328 GuardRecord *createGuardRecord(LasmSideExit *exit);
331 const string mFragName;
334 vector<CallInfo*> mCallInfos;
335 map<string, LIns*> mLabels;
337 LirBufWriter *mBufWriter;
338 LirWriter *mCseFilter;
339 LirWriter *mExprFilter;
340 LirWriter *mSoftFloatFilter;
341 LirWriter *mVerboseWriter;
342 LirWriter *mValidateWriter1;
343 LirWriter *mValidateWriter2;
344 multimap<string, LIns *> mFwdJumps;
350 char mReturnTypeBits;
351 vector<string> mTokens;
353 void tokenizeLine(LirTokenStream &in, LirToken &token);
355 LIns *ref(const string &);
356 LIns *assemble_jump(bool isCond);
357 LIns *assemble_load();
358 LIns *assemble_call(const string &);
359 LIns *assemble_ret(ReturnType rt);
360 LIns *assemble_guard(bool isCond);
361 LIns *assemble_guard_xov();
362 LIns *assemble_jump_jov();
363 void bad(const string &msg);
364 void nyi(const string &opname);
365 void extract_any_label(string &lab, char lab_delim);
366 void resolve_forward_jumps(string &lab, LIns *ins);
370 // 'sin' is overloaded on some platforms, so taking its address
371 // doesn't quite work. Provide a do-nothing function here
372 // that's not overloaded.
373 double sinFn(double d) {
378 double calld1(double x, double i, double y, double l, double x1, double i1, double y1, double l1) {
379 return x + i * y - l + x1 / i1 - y1 * l1;
382 // The calling tests with mixed argument types are sensible for all platforms, but they highlight
383 // the differences between the supported ABIs on ARM.
385 double callid1(int i, double x, double y, int j, int k, double z) {
386 return (x + y + z) / (double)(i + j + k);
389 double callid2(int i, int j, int k, double x) {
390 return x / (double)(i + j + k);
393 double callid3(int i, int j, double x, int k, double y, double z) {
394 return (x + y + z) / (double)(i + j + k);
397 // Simple print function for testing void calls.
402 Function functions[] = {
403 FN(puts, CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_P)),
404 FN(sin, CallInfo::typeSig1(ARGTYPE_D, ARGTYPE_D)),
405 FN(malloc, CallInfo::typeSig1(ARGTYPE_P, ARGTYPE_P)),
406 FN(free, CallInfo::typeSig1(ARGTYPE_V, ARGTYPE_P)),
407 FN(calld1, CallInfo::typeSig8(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
408 ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D)),
409 FN(callid1, CallInfo::typeSig6(ARGTYPE_D, ARGTYPE_I, ARGTYPE_D, ARGTYPE_D,
410 ARGTYPE_I, ARGTYPE_I, ARGTYPE_D)),
411 FN(callid2, CallInfo::typeSig4(ARGTYPE_D, ARGTYPE_I, ARGTYPE_I, ARGTYPE_I, ARGTYPE_D)),
412 FN(callid3, CallInfo::typeSig6(ARGTYPE_D, ARGTYPE_I, ARGTYPE_I, ARGTYPE_D,
413 ARGTYPE_I, ARGTYPE_D, ARGTYPE_D)),
414 FN(printi, CallInfo::typeSig1(ARGTYPE_V, ARGTYPE_I)),
417 template<typename out, typename in> out
422 if ((tmp << arg && tmp >> ret && tmp.eof()))
424 cerr << "bad lexical cast from " << arg << endl;
429 immI(const string &s)
433 if ((s.find("0x") == 0 || s.find("0X") == 0) &&
434 (tmp >> hex >> ret && tmp.eof())) {
437 return lexical_cast<int32_t>(s);
441 immQ(const string &s)
445 if ((s.find("0x") == 0 || s.find("0X") == 0) &&
446 (tmp >> hex >> ret && tmp.eof())) {
449 return lexical_cast<uint64_t>(s);
453 immD(const string &s)
455 return lexical_cast<double>(s);
458 template<typename t> t
459 pop_front(vector<t> &vec)
462 cerr << "pop_front of empty vector" << endl;
466 vec.erase(vec.begin());
471 dep_u8(char *&buf, uint8_t byte, uint32_t &cksum)
473 sprintf(buf, "%2.2X", byte);
479 dep_u32(char *&buf, uint32_t word, uint32_t &cksum)
481 dep_u8(buf, (uint8_t)((word >> 24) & 0xff), cksum);
482 dep_u8(buf, (uint8_t)((word >> 16) & 0xff), cksum);
483 dep_u8(buf, (uint8_t)((word >> 8) & 0xff), cksum);
484 dep_u8(buf, (uint8_t)((word) & 0xff), cksum);
488 dump_srecords(ostream &, Fragment *)
490 // FIXME: Disabled until we work out a sane way to walk through
491 // code chunks under the new CodeAlloc regime.
493 // Write S-records. Can only do 4-byte addresses at the moment.
495 // FIXME: this presently dumps out the entire set of code pages
496 // written-to, which means it often dumps *some* bytes on the last
497 // page that are not necessarily initialized at all; they're
498 // beyond the last instruction written. Fix this to terminate
499 // s-record writing early.
501 assert(sizeof(uintptr_t) == 4);
502 for (Page *page = frag->pages(); page; page = page->next) {
504 uintptr_t p0 = (uintptr_t) &(page->code);
505 for (uintptr_t p = p0; p < p0 + sizeof(page->code); p += step) {
508 // S-record type S3: 8-char / 4-byte address.
510 // +2 char code 'S3'.
511 // +2 char / 1 byte count of remaining bytes (37 = addr, payload, cksum).
512 // +8 char / 4 byte addr.
514 // +64 char / 32 byte payload.
516 // +2 char / 1 byte checksum.
519 size_t count = sizeof(p) + step + 1;
523 char *b = buf + 2; // 2 chars for the "S3" code.
525 dep_u8(b, (uint8_t) count, cksum); // Count of data bytes
526 dep_u32(b, p, cksum); // Address of the data byte being emitted
527 uint8_t *c = (uint8_t*) p;
528 for (size_t i = 0; i < step; ++i) { // Actual object code being emitted
529 dep_u8(b, c[i], cksum);
531 dep_u8(b, (uint8_t)((~cksum) & 0xff), cksum);
532 out << string(buf) << endl;
541 FragmentAssembler::sProfId = 0;
543 FragmentAssembler::FragmentAssembler(Lirasm &parent, const string &fragmentName, bool optimize)
544 : mParent(parent), mFragName(fragmentName), optimize(optimize),
545 mBufWriter(NULL), mCseFilter(NULL), mExprFilter(NULL), mSoftFloatFilter(NULL), mVerboseWriter(NULL),
546 mValidateWriter1(NULL), mValidateWriter2(NULL)
548 mFragment = new Fragment(NULL verbose_only(, (mParent.mLogc.lcbits &
549 nanojit::LC_FragProfile) ?
551 mFragment->lirbuf = mParent.mLirbuf;
552 mParent.mFragments[mFragName].fragptr = mFragment;
554 mLir = mBufWriter = new LirBufWriter(mParent.mLirbuf, nanojit::AvmCore::config);
556 if (optimize) { // don't re-validate if no optimization has taken place
557 mLir = mValidateWriter2 =
558 new ValidateWriter(mLir, mFragment->lirbuf->printer, "end of writer pipeline");
562 if (mParent.mVerbose) {
563 mLir = mVerboseWriter = new VerboseWriter(mParent.mAlloc, mLir,
564 mParent.mLirbuf->printer,
569 mLir = mCseFilter = new CseFilter(mLir, LIRASM_NUM_USED_ACCS, mParent.mAlloc);
571 #if NJ_SOFTFLOAT_SUPPORTED
572 if (avmplus::AvmCore::config.soft_float) {
573 mLir = new SoftFloatFilter(mLir);
577 mLir = mExprFilter = new ExprFilter(mLir);
580 mLir = mValidateWriter1 =
581 new ValidateWriter(mLir, mFragment->lirbuf->printer, "start of writer pipeline");
585 mLir->ins0(LIR_start);
586 for (int i = 0; i < nanojit::NumSavedRegs; ++i)
587 mLir->insParam(i, 1);
592 FragmentAssembler::~FragmentAssembler()
594 delete mValidateWriter1;
595 delete mValidateWriter2;
596 delete mVerboseWriter;
598 delete mSoftFloatFilter;
605 FragmentAssembler::bad(const string &msg)
607 cerr << "line " << mLineno << ": " << msg << endl;
612 FragmentAssembler::nyi(const string &opname)
614 cerr << "line " << mLineno << ": '" << opname << "' not yet implemented, sorry" << endl;
619 FragmentAssembler::need(size_t n)
621 if (mTokens.size() != n) {
622 bad("need " + lexical_cast<string>(n)
623 + " tokens, have " + lexical_cast<string>(mTokens.size()));
628 FragmentAssembler::ref(const string &lab)
630 if (mLabels.find(lab) == mLabels.end())
631 bad("unknown label '" + lab + "'");
632 return mLabels.find(lab)->second;
636 FragmentAssembler::assemble_jump(bool isCond)
642 string cond = pop_front(mTokens);
643 condition = ref(cond);
648 string name = pop_front(mTokens);
649 if (mLabels.find(name) != mLabels.end()) {
650 LIns *target = ref(name);
651 return mLir->insBranch(mOpcode, condition, target);
653 LIns *ins = mLir->insBranch(mOpcode, condition, NULL);
655 mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
657 mFwdJumps.insert(make_pair(name, ins));
664 FragmentAssembler::assemble_load()
666 // Support implicit immediate-as-second-operand modes
667 // since, unlike sti/stqi, no immediate-displacement
668 // load opcodes were defined in LIR.
670 if (mTokens[1].find("0x") == 0 ||
671 mTokens[1].find("0x") == 0 ||
672 mTokens[1].find_first_of("0123456789") == 0) {
673 return mLir->insLoad(mOpcode,
675 immI(mTokens[1]), ACCSET_OTHER);
677 bad("immediate offset required for load");
678 return NULL; // not reached
682 FragmentAssembler::assemble_call(const string &op)
684 CallInfo *ci = new (mParent.mAlloc) CallInfo;
685 mCallInfos.push_back(ci);
687 memset(&args[0], 0, sizeof(args));
689 // Assembler syntax for a call:
691 // call 0x1234 fastcall a b c
693 // requires at least 2 args,
694 // fn address immediate and ABI token.
696 if (mTokens.size() < 2)
697 bad("need at least address and ABI code for " + op);
699 string func = pop_front(mTokens);
700 string abi = pop_front(mTokens);
702 AbiKind _abi = ABI_CDECL;
703 if (abi == "fastcall")
705 else if (abi == "stdcall")
707 else if (abi == "thiscall")
709 else if (abi == "cdecl")
712 bad("call abi name '" + abi + "'");
714 if (mTokens.size() > MAXARGS)
715 bad("too many args to " + op);
717 bool isBuiltin = mParent.lookupFunction(func, ci);
719 // Built-in: use its CallInfo. Also check (some) CallInfo details
720 // against those from the call site.
721 if (_abi != ci->_abi)
722 bad("invalid calling convention for " + func);
725 for (i = 0; i < mTokens.size(); ++i) {
726 args[i] = ref(mTokens[mTokens.size() - (i+1)]);
728 if (i != ci->count_args())
729 bad("wrong number of arguments for " + func);
732 // User-defined function: infer CallInfo details (ABI, arg types, ret
733 // type) from the call site.
735 size_t argc = mTokens.size();
736 ArgType argTypes[MAXARGS];
737 for (size_t i = 0; i < argc; ++i) {
738 NanoAssert(i < MAXARGS); // should give a useful error msg if this fails
739 args[i] = ref(mTokens[mTokens.size() - (i+1)]);
740 if (args[i]->isD()) argTypes[i] = ARGTYPE_D;
742 else if (args[i]->isQ()) argTypes[i] = ARGTYPE_Q;
744 else argTypes[i] = ARGTYPE_I;
747 // Select return type from opcode.
748 ArgType retType = ARGTYPE_P;
749 if (mOpcode == LIR_callv) retType = ARGTYPE_V;
750 else if (mOpcode == LIR_calli) retType = ARGTYPE_I;
752 else if (mOpcode == LIR_callq) retType = ARGTYPE_Q;
754 else if (mOpcode == LIR_calld) retType = ARGTYPE_D;
756 ci->_typesig = CallInfo::typeSigN(retType, argc, argTypes);
759 return mLir->insCall(ci, args);
763 FragmentAssembler::assemble_ret(ReturnType rt)
766 mReturnTypeBits |= rt;
767 return mLir->ins1(mOpcode, ref(mTokens[0]));
771 FragmentAssembler::createSideExit()
773 LasmSideExit* exit = new (mParent.mAlloc) LasmSideExit();
774 memset(exit, 0, sizeof(LasmSideExit));
775 exit->from = mFragment;
777 exit->line = mLineno;
782 FragmentAssembler::createGuardRecord(LasmSideExit *exit)
784 GuardRecord *rec = new (mParent.mAlloc) GuardRecord;
785 memset(rec, 0, sizeof(GuardRecord));
792 FragmentAssembler::assemble_guard(bool isCond)
794 GuardRecord* guard = createGuardRecord(createSideExit());
799 ins_cond = ref(pop_front(mTokens));
805 mReturnTypeBits |= RT_GUARD;
807 if (!mTokens.empty())
808 bad("too many arguments");
810 return mLir->insGuard(mOpcode, ins_cond, guard);
814 FragmentAssembler::assemble_guard_xov()
816 GuardRecord* guard = createGuardRecord(createSideExit());
820 mReturnTypeBits |= RT_GUARD;
822 return mLir->insGuardXov(mOpcode, ref(mTokens[0]), ref(mTokens[1]), guard);
826 FragmentAssembler::assemble_jump_jov()
830 LIns *a = ref(mTokens[0]);
831 LIns *b = ref(mTokens[1]);
832 string name = mTokens[2];
834 if (mLabels.find(name) != mLabels.end()) {
835 LIns *target = ref(name);
836 return mLir->insBranchJov(mOpcode, a, b, target);
838 LIns *ins = mLir->insBranchJov(mOpcode, a, b, NULL);
840 mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
842 mFwdJumps.insert(make_pair(name, ins));
849 FragmentAssembler::endFragment()
851 if (mReturnTypeBits == 0) {
852 cerr << "warning: no return type in fragment '"
853 << mFragName << "'" << endl;
855 } else if (mReturnTypeBits != RT_INT &&
857 mReturnTypeBits != RT_QUAD &&
859 mReturnTypeBits != RT_DOUBLE &&
860 mReturnTypeBits != RT_GUARD)
862 cerr << "warning: multiple return types in fragment '"
863 << mFragName << "'" << endl;
867 mLir->insGuard(LIR_x, NULL, createGuardRecord(createSideExit()));
869 mParent.mAssm.compile(mFragment, mParent.mAlloc, optimize
870 verbose_only(, mParent.mLirbuf->printer));
872 if (mParent.mAssm.error() != nanojit::None) {
873 cerr << "error during assembly: ";
874 switch (mParent.mAssm.error()) {
875 case nanojit::BranchTooFar: cerr << "BranchTooFar"; break;
876 case nanojit::StackFull: cerr << "StackFull"; break;
877 case nanojit::UnknownBranch: cerr << "UnknownBranch"; break;
878 case nanojit::None: cerr << "None"; break;
879 default: NanoAssert(0); break;
886 f = &mParent.mFragments[mFragName];
888 switch (mReturnTypeBits) {
890 f->rint = (RetInt)((uintptr_t)mFragment->code());
891 f->mReturnType = RT_INT;
895 f->rquad = (RetQuad)((uintptr_t)mFragment->code());
896 f->mReturnType = RT_QUAD;
900 f->rdouble = (RetDouble)((uintptr_t)mFragment->code());
901 f->mReturnType = RT_DOUBLE;
904 f->rguard = (RetGuard)((uintptr_t)mFragment->code());
905 f->mReturnType = RT_GUARD;
912 mParent.mFragments[mFragName].mLabels = mLabels;
916 FragmentAssembler::tokenizeLine(LirTokenStream &in, LirToken &token)
919 mTokens.push_back(token.data);
921 while (in.get(token)) {
922 if (token.type == NEWLINE)
924 mTokens.push_back(token.data);
929 FragmentAssembler::extract_any_label(string &lab, char lab_delim)
931 if (mTokens.size() > 2 && mTokens[1].size() == 1 && mTokens[1][0] == lab_delim) {
932 lab = pop_front(mTokens);
933 pop_front(mTokens); // remove punctuation
935 if (mLabels.find(lab) != mLabels.end())
936 bad("duplicate label");
941 FragmentAssembler::resolve_forward_jumps(string &lab, LIns *ins)
943 typedef multimap<string, LIns *> mulmap;
945 typedef mulmap::iterator ci;
947 typedef mulmap::const_iterator ci;
949 pair<ci, ci> range = mFwdJumps.equal_range(lab);
950 for (ci i = range.first; i != range.second; ++i) {
951 i->second->setTarget(ins);
953 mFwdJumps.erase(lab);
957 FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, const LirToken *firstToken)
964 } else if (!in.get(token)) {
966 bad("unexpected end of file in fragment '" + mFragName + "'");
969 if (token.type == NEWLINE)
971 if (token.type != NAME)
972 bad("unexpected token '" + token.data + "'");
974 string op = token.data;
976 bad("nested fragments are not supported");
979 bad(".end without .begin");
980 if (!in.eat(NEWLINE))
981 bad("extra junk after .end");
985 mLineno = token.lineno;
986 tokenizeLine(in, token);
990 extract_any_label(lab, ':');
992 /* Save label and do any back-patching of deferred forward-jumps. */
994 ins = mLir->ins0(LIR_label);
995 resolve_forward_jumps(lab, ins);
998 extract_any_label(lab, '=');
1000 assert(!mTokens.empty());
1001 op = pop_front(mTokens);
1002 if (mParent.mOpMap.find(op) == mParent.mOpMap.end())
1003 bad("unknown instruction '" + op + "'");
1005 mOpcode = mParent.mOpMap[op];
1009 bad("start instructions cannot be specified explicitly");
1014 ins = mLir->ins0(mOpcode);
1033 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1037 ins = mLir->ins1(mOpcode,
1044 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1090 ins = mLir->ins2(mOpcode,
1099 ins = mLir->ins3(mOpcode,
1106 ins = assemble_jump(/*isCond*/false);
1111 ins = assemble_jump(/*isCond*/true);
1116 ins = mLir->insImmI(immI(mTokens[0]));
1119 #ifdef NANOJIT_64BIT
1122 ins = mLir->insImmQ(immQ(mTokens[0]));
1128 ins = mLir->insImmD(immD(mTokens[0]));
1131 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1140 ins = mLir->insStore(mOpcode, ref(mTokens[0]),
1142 immI(mTokens[2]), ACCSET_OTHER);
1145 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1155 ins = assemble_load();
1158 // XXX: insParam gives the one appropriate for the platform. Eg. if
1159 // you specify qparam on x86 you'll end up with iparam anyway. Fix
1163 ins = mLir->insParam(immI(mTokens[0]),
1167 // XXX: similar to iparam/qparam above.
1170 ins = mLir->insAlloc(immI(mTokens[0]));
1174 bad("skip instruction is deprecated");
1179 ins = assemble_guard(/*isCond*/false);
1184 ins = assemble_guard(/*isCond*/true);
1190 ins = assemble_guard_xov();
1196 CASE64(LIR_addjovq:)
1197 CASE64(LIR_subjovq:)
1198 ins = assemble_jump_jov();
1206 ins = assemble_call(op);
1210 ins = assemble_ret(RT_INT);
1213 #ifdef NANOJIT_64BIT
1215 ins = assemble_ret(RT_QUAD);
1220 ins = assemble_ret(RT_DOUBLE);
1224 ins = mLir->ins0(LIR_label);
1226 resolve_forward_jumps(lab, ins);
1244 mLabels.insert(make_pair(lab, ins));
1250 /* ------------------ Support for --random -------------------------- */
1252 // Returns a positive integer in the range 0..(lim-1).
1253 static inline size_t
1256 size_t i = size_t(rand());
1260 // Returns an int32_t in the range -RAND_MAX..RAND_MAX.
1261 static inline int32_t
1264 return (rnd(2) ? 1 : -1) * rand();
1267 // The maximum number of live values (per type, ie. B/I/Q/F) that are
1268 // available to be used as operands. If we make it too high we're prone to
1269 // run out of stack space due to spilling. Needs to be set in consideration
1270 // with spillStackSzB.
1271 const size_t maxLiveValuesPerType = 20;
1273 // Returns a uint32_t in the range 0..(RAND_MAX*2).
1274 static inline uint32_t
1277 return uint32_t(rnd(2) ? 0 : RAND_MAX) + uint32_t(rand());
1280 template<typename t> t
1281 rndPick(vector<t> &v)
1284 return v[rnd(v.size())];
1287 // Add the operand, and retire an old one if we have too many.
1288 template<typename t> void
1289 addOrReplace(vector<t> &v, t x)
1291 if (v.size() > maxLiveValuesPerType) {
1292 v[rnd(v.size())] = x; // we're full: overwrite an existing element
1294 v.push_back(x); // add to end
1298 // Returns a 4-aligned address within the given size.
1299 static int32_t rndOffset32(size_t szB)
1301 return int32_t(rnd(szB)) & ~3;
1304 // Returns an 8-aligned address within the give size.
1305 static int32_t rndOffset64(size_t szB)
1307 return int32_t(rnd(szB)) & ~7;
1310 static int32_t f_I_I1(int32_t a)
1315 static int32_t f_I_I6(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f)
1317 return a + b + c + d + e + f;
1320 #ifdef NANOJIT_64BIT
1321 static uint64_t f_Q_Q2(uint64_t a, uint64_t b)
1326 static uint64_t f_Q_Q7(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
1327 uint64_t e, uint64_t f, uint64_t g)
1329 return a + b + c + d + e + f + g;
1333 static double f_F_F3(double a, double b, double c)
1338 static double f_F_F8(double a, double b, double c, double d,
1339 double e, double f, double g, double h)
1341 return a + b + c + d + e + f + g + h;
1344 #ifdef NANOJIT_64BIT
1345 static void f_V_IQF(int32_t, uint64_t, double)
1347 return; // no need to do anything
1351 const CallInfo ci_I_I1 = CI(f_I_I1, CallInfo::typeSig1(ARGTYPE_I, ARGTYPE_I));
1352 const CallInfo ci_I_I6 = CI(f_I_I6, CallInfo::typeSig6(ARGTYPE_I, ARGTYPE_I, ARGTYPE_I, ARGTYPE_I,
1353 ARGTYPE_I, ARGTYPE_I, ARGTYPE_I));
1355 #ifdef NANOJIT_64BIT
1356 const CallInfo ci_Q_Q2 = CI(f_Q_Q2, CallInfo::typeSig2(ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q));
1357 const CallInfo ci_Q_Q7 = CI(f_Q_Q7, CallInfo::typeSig7(ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q,
1358 ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q, ARGTYPE_Q));
1361 const CallInfo ci_F_F3 = CI(f_F_F3, CallInfo::typeSig3(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D));
1362 const CallInfo ci_F_F8 = CI(f_F_F8, CallInfo::typeSig8(ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
1363 ARGTYPE_D, ARGTYPE_D, ARGTYPE_D, ARGTYPE_D,
1366 #ifdef NANOJIT_64BIT
1367 const CallInfo ci_V_IQF = CI(f_V_IQF, CallInfo::typeSig3(ARGTYPE_V, ARGTYPE_I, ARGTYPE_Q, ARGTYPE_D));
1370 // Generate a random block containing nIns instructions, plus a few more
1371 // setup/shutdown ones at the start and end.
1374 // - We divide LIR into numerous classes, mostly according to their type.
1375 // (See LInsClasses.tbl for details.) Each time around the loop we choose
1376 // the class randomly, but there is weighting so that some classes are more
1377 // common than others, in an attempt to reflect the structure of real code.
1378 // - Each instruction that produces a value is put in a buffer of the
1379 // appropriate type, for possible use as an operand of a later instruction.
1380 // This buffer is trimmed when its size exceeds 'maxLiveValuesPerType'.
1381 // - If not enough operands are present in a buffer for the particular
1382 // instruction, we don't add it.
1383 // - Skips aren't explicitly generated, but they do occcur if the fragment is
1384 // sufficiently big that it's spread across multiple chunks.
1386 // The following instructions aren't generated yet:
1387 // - LIR_parami/LIR_paramq (hard to test beyond what is auto-generated in fragment
1389 // - LIR_livei/LIR_liveq/LIR_lived
1391 // - LIR_x/LIR_xt/LIR_xf/LIR_xtbl/LIR_addxovi/LIR_subxovi/LIR_mulxovi (hard to
1392 // test without having multiple fragments; when we only have one fragment
1393 // we don't really want to leave it early)
1394 // - LIR_reti/LIR_retq/LIR_retd (hard to test without having multiple fragments)
1395 // - LIR_j/LIR_jt/LIR_jf/LIR_jtbl/LIR_label
1396 // - LIR_file/LIR_line (#ifdef VTUNE only)
1397 // - LIR_modd (not implemented in NJ backends)
1399 // Other limitations:
1400 // - Loads always use accSet==ACCSET_OTHER
1401 // - Stores always use accSet==ACCSET_OTHER
1404 FragmentAssembler::assembleRandomFragment(int nIns)
1406 vector<LIns*> Bs; // boolean values, ie. 32-bit int values produced by tests
1407 vector<LIns*> Is; // 32-bit int values
1408 vector<LIns*> Qs; // 64-bit int values
1409 vector<LIns*> Ds; // 64-bit double values
1410 vector<LIns*> M4s; // 4 byte allocs
1411 vector<LIns*> M8ps; // 8+ byte allocs
1413 vector<LOpcode> I_I_ops;
1414 I_I_ops.push_back(LIR_negi);
1415 I_I_ops.push_back(LIR_noti);
1417 // Nb: there are no Q_Q_ops.
1419 vector<LOpcode> D_D_ops;
1420 D_D_ops.push_back(LIR_negd);
1422 vector<LOpcode> I_II_ops;
1423 I_II_ops.push_back(LIR_addi);
1424 I_II_ops.push_back(LIR_subi);
1425 I_II_ops.push_back(LIR_muli);
1426 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1427 I_II_ops.push_back(LIR_divi);
1428 I_II_ops.push_back(LIR_modi);
1430 I_II_ops.push_back(LIR_andi);
1431 I_II_ops.push_back(LIR_ori);
1432 I_II_ops.push_back(LIR_xori);
1433 I_II_ops.push_back(LIR_lshi);
1434 I_II_ops.push_back(LIR_rshi);
1435 I_II_ops.push_back(LIR_rshui);
1437 #ifdef NANOJIT_64BIT
1438 vector<LOpcode> Q_QQ_ops;
1439 Q_QQ_ops.push_back(LIR_addq);
1440 Q_QQ_ops.push_back(LIR_andq);
1441 Q_QQ_ops.push_back(LIR_orq);
1442 Q_QQ_ops.push_back(LIR_xorq);
1444 vector<LOpcode> Q_QI_ops;
1445 Q_QI_ops.push_back(LIR_lshq);
1446 Q_QI_ops.push_back(LIR_rshq);
1447 Q_QI_ops.push_back(LIR_rshuq);
1450 vector<LOpcode> D_DD_ops;
1451 D_DD_ops.push_back(LIR_addd);
1452 D_DD_ops.push_back(LIR_subd);
1453 D_DD_ops.push_back(LIR_muld);
1454 D_DD_ops.push_back(LIR_divd);
1456 vector<LOpcode> I_BII_ops;
1457 I_BII_ops.push_back(LIR_cmovi);
1459 #ifdef NANOJIT_64BIT
1460 vector<LOpcode> Q_BQQ_ops;
1461 Q_BQQ_ops.push_back(LIR_cmovq);
1464 vector<LOpcode> D_BDD_ops;
1465 D_BDD_ops.push_back(LIR_cmovd);
1467 vector<LOpcode> B_II_ops;
1468 B_II_ops.push_back(LIR_eqi);
1469 B_II_ops.push_back(LIR_lti);
1470 B_II_ops.push_back(LIR_gti);
1471 B_II_ops.push_back(LIR_lei);
1472 B_II_ops.push_back(LIR_gei);
1473 B_II_ops.push_back(LIR_ltui);
1474 B_II_ops.push_back(LIR_gtui);
1475 B_II_ops.push_back(LIR_leui);
1476 B_II_ops.push_back(LIR_geui);
1478 #ifdef NANOJIT_64BIT
1479 vector<LOpcode> B_QQ_ops;
1480 B_QQ_ops.push_back(LIR_eqq);
1481 B_QQ_ops.push_back(LIR_ltq);
1482 B_QQ_ops.push_back(LIR_gtq);
1483 B_QQ_ops.push_back(LIR_leq);
1484 B_QQ_ops.push_back(LIR_geq);
1485 B_QQ_ops.push_back(LIR_ltuq);
1486 B_QQ_ops.push_back(LIR_gtuq);
1487 B_QQ_ops.push_back(LIR_leuq);
1488 B_QQ_ops.push_back(LIR_geuq);
1491 vector<LOpcode> B_DD_ops;
1492 B_DD_ops.push_back(LIR_eqd);
1493 B_DD_ops.push_back(LIR_ltd);
1494 B_DD_ops.push_back(LIR_gtd);
1495 B_DD_ops.push_back(LIR_led);
1496 B_DD_ops.push_back(LIR_ged);
1498 #ifdef NANOJIT_64BIT
1499 vector<LOpcode> Q_I_ops;
1500 Q_I_ops.push_back(LIR_i2q);
1501 Q_I_ops.push_back(LIR_ui2uq);
1503 vector<LOpcode> I_Q_ops;
1504 I_Q_ops.push_back(LIR_q2i);
1507 vector<LOpcode> D_I_ops;
1508 #if !NJ_SOFTFLOAT_SUPPORTED
1509 // Don't emit LIR_{ui,i}2d for soft-float platforms because the soft-float filter removes them.
1510 D_I_ops.push_back(LIR_i2d);
1511 D_I_ops.push_back(LIR_ui2d);
1512 #elif defined(NANOJIT_ARM)
1513 // The ARM back-end can detect FP support at run-time.
1514 if (avmplus::AvmCore::config.arm_vfp) {
1515 D_I_ops.push_back(LIR_i2d);
1516 D_I_ops.push_back(LIR_ui2d);
1520 vector<LOpcode> I_D_ops;
1521 #if NJ_SOFTFLOAT_SUPPORTED
1522 I_D_ops.push_back(LIR_dlo2i);
1523 I_D_ops.push_back(LIR_dhi2i);
1525 #if !NJ_SOFTFLOAT_SUPPORTED
1526 // Don't emit LIR_d2i for soft-float platforms because the soft-float filter removes it.
1527 I_D_ops.push_back(LIR_d2i);
1528 #elif defined(NANOJIT_ARM)
1529 // The ARM back-end can detect FP support at run-time.
1530 if (avmplus::AvmCore::config.arm_vfp) {
1531 I_D_ops.push_back(LIR_d2i);
1535 #ifdef NANOJIT_64BIT
1536 vector<LOpcode> Q_D_ops;
1537 Q_D_ops.push_back(LIR_dasq);
1539 vector<LOpcode> D_Q_ops;
1540 D_Q_ops.push_back(LIR_qasd);
1543 vector<LOpcode> D_II_ops;
1544 #if NJ_SOFTFLOAT_SUPPORTED
1545 D_II_ops.push_back(LIR_ii2d);
1548 vector<LOpcode> I_loads;
1549 I_loads.push_back(LIR_ldi); // weight LIR_ldi more heavily
1550 I_loads.push_back(LIR_ldi);
1551 I_loads.push_back(LIR_ldi);
1552 I_loads.push_back(LIR_lduc2ui);
1553 I_loads.push_back(LIR_ldus2ui);
1554 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1555 I_loads.push_back(LIR_ldc2i);
1556 I_loads.push_back(LIR_lds2i);
1559 #ifdef NANOJIT_64BIT
1560 vector<LOpcode> Q_loads;
1561 Q_loads.push_back(LIR_ldq);
1564 vector<LOpcode> D_loads;
1565 D_loads.push_back(LIR_ldd);
1566 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
1567 // this loads a 32-bit float and expands it to 64-bit float
1568 D_loads.push_back(LIR_ldf2d);
1572 #define CL___(name, relFreq) name,
1573 #include "LInsClasses.tbl"
1578 int relFreqs[LLAST];
1579 memset(relFreqs, 0, sizeof(relFreqs));
1580 #define CL___(name, relFreq) relFreqs[name] = relFreq;
1581 #include "LInsClasses.tbl"
1584 int relFreqsSum = 0; // the sum of the individual relative frequencies
1585 for (int c = 0; c < LLAST; c++) {
1586 relFreqsSum += relFreqs[c];
1589 // The number of times each LInsClass value appears in classGenerator[]
1590 // matches 'relFreqs' (see LInsClasses.tbl). Eg. if relFreqs[LIMM_I] ==
1591 // 10, then LIMM_I appears in classGenerator[] 10 times.
1592 LInsClass* classGenerator = new LInsClass[relFreqsSum];
1594 for (int c = 0; c < LLAST; c++) {
1595 for (int i = 0; i < relFreqs[c]; i++) {
1596 classGenerator[j++] = LInsClass(c);
1600 // Used to keep track of how much stack we've explicitly used via
1601 // LIR_allocp. We then need to keep some reserve for spills as well.
1602 const size_t stackSzB = NJ_MAX_STACK_ENTRY * 4;
1603 const size_t spillStackSzB = 1024;
1604 const size_t maxExplicitlyUsedStackSzB = stackSzB - spillStackSzB;
1605 size_t explicitlyUsedStackSzB = 0;
1607 // Do an 8-byte stack alloc right at the start so that loads and stores
1608 // can be done immediately.
1609 addOrReplace(M8ps, mLir->insAlloc(8));
1616 switch (classGenerator[rnd(relFreqsSum)]) {
1620 mLir->ins0(LIR_regfence);
1622 mLir->insGuard(LIR_xbarrier, NULL, createGuardRecord(createSideExit()));
1628 // The stack has a limited size, so we (a) don't want chunks to be
1629 // too big, and (b) have to stop allocating them after a while.
1632 case 0: szB = 4; break;
1633 case 1: szB = 8; break;
1634 case 2: szB = 4 * (rnd(6) + 3); break; // 12, 16, ..., 32
1636 if (explicitlyUsedStackSzB + szB <= maxExplicitlyUsedStackSzB) {
1637 ins = mLir->insAlloc(szB);
1638 // We add the result to Is/Qs so it can be used as an ordinary
1639 // operand, and to M4s/M8ps so that loads/stores can be done from
1641 #if defined NANOJIT_64BIT
1642 addOrReplace(Qs, ins);
1644 addOrReplace(Is, ins);
1647 addOrReplace(M4s, ins);
1649 addOrReplace(M8ps, ins);
1651 // It's possible that we will exceed maxExplicitlyUsedStackSzB
1652 // by up to 28 bytes. Doesn't matter.
1653 explicitlyUsedStackSzB += szB;
1659 // For the immediates, we bias towards smaller numbers, especially 0
1660 // and 1 and small multiples of 4 which are common due to memory
1661 // addressing. This puts some realistic stress on CseFilter.
1663 int32_t immI = 0; // shut gcc up
1665 case 0: immI = 0; break;
1666 case 1: immI = 1; break;
1667 case 2: immI = 4 * (rnd(256) + 1); break; // 4, 8, ..., 1024
1668 case 3: immI = rnd(19999) - 9999; break; // -9999..9999
1669 case 4: immI = rndI32(); break; // -RAND_MAX..RAND_MAX
1671 ins = mLir->insImmI(immI);
1672 addOrReplace(Is, ins);
1677 #ifdef NANOJIT_64BIT
1681 case 0: imm64 = 0; break;
1682 case 1: imm64 = 1; break;
1683 case 2: imm64 = 4 * (rnd(256) + 1); break; // 4, 8, ..., 1024
1684 case 3: imm64 = rnd(19999) - 9999; break; // -9999..9999
1685 case 4: imm64 = uint64_t(rndU32()) << 32 | rndU32(); break; // possibly big!
1687 ins = mLir->insImmQ(imm64);
1688 addOrReplace(Qs, ins);
1695 // We don't explicitly generate infinities and NaNs here, but they
1696 // end up occurring due to ExprFilter evaluating expressions like
1697 // divd(1,0) and divd(Infinity,Infinity).
1700 case 0: imm64f = 0.0; break;
1701 case 1: imm64f = 1.0; break;
1703 case 3: imm64f = double(rnd(1000)); break; // 0.0..9999.0
1709 u.q = uint64_t(rndU32()) << 32 | rndU32();
1713 ins = mLir->insImmD(imm64f);
1714 addOrReplace(Ds, ins);
1721 ins = mLir->ins1(rndPick(I_I_ops), rndPick(Is));
1722 addOrReplace(Is, ins);
1727 // case LOP_Q_Q: no instruction in this category
1731 ins = mLir->ins1(rndPick(D_D_ops), rndPick(Ds));
1732 addOrReplace(Ds, ins);
1739 LOpcode op = rndPick(I_II_ops);
1740 LIns* lhs = rndPick(Is);
1741 LIns* rhs = rndPick(Is);
1742 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1743 if (op == LIR_divi || op == LIR_modi) {
1744 // XXX: ExprFilter can't fold a div/mod with constant
1745 // args, due to the horrible semantics of LIR_modi. So we
1746 // just don't generate anything if we hit that case.
1747 if (!lhs->isImmI() || !rhs->isImmI()) {
1748 // If the divisor is positive, no problems. If it's zero, we get an
1749 // exception. If it's -1 and the dividend is -2147483648 (-2^31) we get
1750 // an exception (and this has been encountered in practice). So we only
1751 // allow positive divisors, ie. compute: lhs / (rhs > 0 ? rhs : -k),
1752 // where k is a random number in the range 2..100 (this ensures we have
1753 // some negative divisors).
1754 LIns* gt0 = mLir->ins2ImmI(LIR_gti, rhs, 0);
1755 LIns* rhs2 = mLir->ins3(LIR_cmovi, gt0, rhs, mLir->insImmI(-((int32_t)rnd(99)) - 2));
1756 LIns* div = mLir->ins2(LIR_divi, lhs, rhs2);
1757 if (op == LIR_divi) {
1759 addOrReplace(Is, ins);
1762 ins = mLir->ins1(LIR_modi, div);
1763 // Add 'div' to the operands too so it might be used again, because
1764 // the code generated is different as compared to the case where 'div'
1765 // isn't used again.
1766 addOrReplace(Is, div);
1767 addOrReplace(Is, ins);
1774 ins = mLir->ins2(op, lhs, rhs);
1775 addOrReplace(Is, ins);
1781 #ifdef NANOJIT_64BIT
1784 ins = mLir->ins2(rndPick(Q_QQ_ops), rndPick(Qs), rndPick(Qs));
1785 addOrReplace(Qs, ins);
1791 if (!Qs.empty() && !Is.empty()) {
1792 ins = mLir->ins2(rndPick(Q_QI_ops), rndPick(Qs), rndPick(Is));
1793 addOrReplace(Qs, ins);
1801 ins = mLir->ins2(rndPick(D_DD_ops), rndPick(Ds), rndPick(Ds));
1802 addOrReplace(Ds, ins);
1808 if (!Bs.empty() && !Is.empty()) {
1809 ins = mLir->ins3(rndPick(I_BII_ops), rndPick(Bs), rndPick(Is), rndPick(Is));
1810 addOrReplace(Is, ins);
1815 #ifdef NANOJIT_64BIT
1817 if (!Bs.empty() && !Qs.empty()) {
1818 ins = mLir->ins3(rndPick(Q_BQQ_ops), rndPick(Bs), rndPick(Qs), rndPick(Qs));
1819 addOrReplace(Qs, ins);
1826 if (!Bs.empty() && !Ds.empty()) {
1827 ins = mLir->ins3(rndPick(D_BDD_ops), rndPick(Bs), rndPick(Ds), rndPick(Ds));
1828 addOrReplace(Ds, ins);
1835 ins = mLir->ins2(rndPick(B_II_ops), rndPick(Is), rndPick(Is));
1836 addOrReplace(Bs, ins);
1841 #ifdef NANOJIT_64BIT
1844 ins = mLir->ins2(rndPick(B_QQ_ops), rndPick(Qs), rndPick(Qs));
1845 addOrReplace(Bs, ins);
1853 ins = mLir->ins2(rndPick(B_DD_ops), rndPick(Ds), rndPick(Ds));
1854 // XXX: we don't push the result, because most (all?) of the
1855 // backends currently can't handle cmovs/qcmovs that take
1856 // float comparisons for the test (see bug 520944). This means
1857 // that all B_DD values are dead, unfortunately.
1858 //addOrReplace(Bs, ins);
1863 #ifdef NANOJIT_64BIT
1866 ins = mLir->ins1(rndPick(Q_I_ops), rndPick(Is));
1867 addOrReplace(Qs, ins);
1874 if (!Is.empty() && !D_I_ops.empty()) {
1875 ins = mLir->ins1(rndPick(D_I_ops), rndPick(Is));
1876 addOrReplace(Ds, ins);
1881 #ifdef NANOJIT_64BIT
1884 ins = mLir->ins1(rndPick(I_Q_ops), rndPick(Qs));
1885 addOrReplace(Is, ins);
1892 // XXX: NativeX64 doesn't implement qhi yet (and it may not need to).
1893 #if !defined NANOJIT_X64
1895 ins = mLir->ins1(rndPick(I_D_ops), rndPick(Ds));
1896 addOrReplace(Is, ins);
1902 #if defined NANOJIT_X64
1905 ins = mLir->ins1(rndPick(Q_D_ops), rndPick(Ds));
1906 addOrReplace(Qs, ins);
1913 ins = mLir->ins1(rndPick(D_Q_ops), rndPick(Qs));
1914 addOrReplace(Ds, ins);
1921 if (!Is.empty() && !D_II_ops.empty()) {
1922 ins = mLir->ins2(rndPick(D_II_ops), rndPick(Is), rndPick(Is));
1923 addOrReplace(Ds, ins);
1929 vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
1931 LIns* base = rndPick(Ms);
1932 ins = mLir->insLoad(rndPick(I_loads), base, rndOffset32(base->size()), ACCSET_OTHER);
1933 addOrReplace(Is, ins);
1939 #ifdef NANOJIT_64BIT
1941 if (!M8ps.empty()) {
1942 LIns* base = rndPick(M8ps);
1943 ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()), ACCSET_OTHER);
1944 addOrReplace(Qs, ins);
1951 if (!M8ps.empty()) {
1952 LIns* base = rndPick(M8ps);
1953 ins = mLir->insLoad(rndPick(D_loads), base, rndOffset64(base->size()), ACCSET_OTHER);
1954 addOrReplace(Ds, ins);
1960 vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
1961 if (!Ms.empty() && !Is.empty()) {
1962 LIns* base = rndPick(Ms);
1963 mLir->insStore(rndPick(Is), base, rndOffset32(base->size()), ACCSET_OTHER);
1969 #ifdef NANOJIT_64BIT
1971 if (!M8ps.empty() && !Qs.empty()) {
1972 LIns* base = rndPick(M8ps);
1973 mLir->insStore(rndPick(Qs), base, rndOffset64(base->size()), ACCSET_OTHER);
1980 if (!M8ps.empty() && !Ds.empty()) {
1981 LIns* base = rndPick(M8ps);
1982 mLir->insStore(rndPick(Ds), base, rndOffset64(base->size()), ACCSET_OTHER);
1989 LIns* args[1] = { rndPick(Is) };
1990 ins = mLir->insCall(&ci_I_I1, args);
1991 addOrReplace(Is, ins);
1998 LIns* args[6] = { rndPick(Is), rndPick(Is), rndPick(Is),
1999 rndPick(Is), rndPick(Is), rndPick(Is) };
2000 ins = mLir->insCall(&ci_I_I6, args);
2001 addOrReplace(Is, ins);
2006 #ifdef NANOJIT_64BIT
2009 LIns* args[2] = { rndPick(Qs), rndPick(Qs) };
2010 ins = mLir->insCall(&ci_Q_Q2, args);
2011 addOrReplace(Qs, ins);
2018 LIns* args[7] = { rndPick(Qs), rndPick(Qs), rndPick(Qs), rndPick(Qs),
2019 rndPick(Qs), rndPick(Qs), rndPick(Qs) };
2020 ins = mLir->insCall(&ci_Q_Q7, args);
2021 addOrReplace(Qs, ins);
2029 LIns* args[3] = { rndPick(Ds), rndPick(Ds), rndPick(Ds) };
2030 ins = mLir->insCall(&ci_F_F3, args);
2031 addOrReplace(Ds, ins);
2038 LIns* args[8] = { rndPick(Ds), rndPick(Ds), rndPick(Ds), rndPick(Ds),
2039 rndPick(Ds), rndPick(Ds), rndPick(Ds), rndPick(Ds) };
2040 ins = mLir->insCall(&ci_F_F8, args);
2041 addOrReplace(Ds, ins);
2046 #ifdef NANOJIT_64BIT
2048 if (!Is.empty() && !Qs.empty() && !Ds.empty()) {
2049 // Nb: args[] holds the args in reverse order... sigh.
2050 LIns* args[3] = { rndPick(Ds), rndPick(Qs), rndPick(Is) };
2051 ins = mLir->insCall(&ci_V_IQF, args);
2058 // Although no jumps are generated yet, labels are important
2059 // because they delimit areas where CSE can be applied. Without
2060 // them, CSE can be applied over very long regions, which leads to
2061 // values that have very large live ranges, which leads to stack
2063 mLir->ins0(LIR_label);
2073 delete[] classGenerator;
2076 mReturnTypeBits |= RT_INT;
2077 mLir->ins1(LIR_reti, mLir->insImmI(0));
2082 Lirasm::Lirasm(bool verbose) :
2083 mAssm(mCodeAlloc, mAlloc, mAlloc, &mCore, &mLogc, nanojit::AvmCore::config)
2088 mLirbuf = new (mAlloc) LirBuffer(mAlloc);
2091 mLogc.lcbits = LC_ReadLIR | LC_AfterDCE | LC_Native | LC_RegAlloc | LC_Activation;
2092 mLirbuf->printer = new (mAlloc) LInsPrinter(mAlloc, LIRASM_NUM_USED_ACCS);
2096 // Populate the mOpMap table.
2097 #define OP___(op, number, repKind, retType, isCse) \
2098 mOpMap[#op] = LIR_##op;
2099 #include "nanojit/LIRopcode.tbl"
2102 // XXX: could add more pointer-sized synonyms here
2103 mOpMap["paramp"] = mOpMap[PTR_SIZE("parami", "paramq")];
2104 mOpMap["livep"] = mOpMap[PTR_SIZE("livei", "liveq")];
2109 Fragments::iterator i;
2110 for (i = mFragments.begin(); i != mFragments.end(); ++i) {
2111 delete i->second.fragptr;
2117 Lirasm::lookupFunction(const string &name, CallInfo *&ci)
2119 const size_t nfuns = sizeof(functions) / sizeof(functions[0]);
2120 for (size_t i = 0; i < nfuns; i++) {
2121 if (name == functions[i].name) {
2122 *ci = functions[i].callInfo;
2127 Fragments::const_iterator func = mFragments.find(name);
2128 if (func != mFragments.end()) {
2129 // The ABI, arg types and ret type will be overridden by the caller.
2130 if (func->second.mReturnType == RT_DOUBLE) {
2131 CallInfo target = {(uintptr_t) func->second.rdouble,
2132 0, ABI_FASTCALL, /*isPure*/0, ACCSET_STORE_ANY
2133 verbose_only(, func->first.c_str()) };
2137 CallInfo target = {(uintptr_t) func->second.rint,
2138 0, ABI_FASTCALL, /*isPure*/0, ACCSET_STORE_ANY
2139 verbose_only(, func->first.c_str()) };
2145 bad("invalid function reference " + name);
2151 Lirasm::assemble(istream &in, bool optimize)
2153 LirTokenStream ts(in);
2157 while (ts.get(token)) {
2159 if (token.type == NEWLINE)
2161 if (token.type != NAME)
2162 bad("unexpected token '" + token.data + "'");
2164 const string &op = token.data;
2165 if (op == ".patch") {
2167 } else if (op == ".begin") {
2169 if (!ts.getName(name))
2170 bad("expected fragment name after .begin");
2171 if (!ts.eat(NEWLINE))
2172 bad("extra junk after .begin " + name);
2174 FragmentAssembler assembler(*this, name, optimize);
2175 assembler.assembleFragment(ts, false, NULL);
2177 } else if (op == ".end") {
2178 bad(".end without .begin");
2180 FragmentAssembler assembler(*this, "main", optimize);
2181 assembler.assembleFragment(ts, true, &token);
2184 bad("unexpected stray opcode '" + op + "'");
2190 Lirasm::assembleRandom(int nIns, bool optimize)
2192 string name = "main";
2193 FragmentAssembler assembler(*this, name, optimize);
2194 assembler.assembleRandomFragment(nIns);
2198 Lirasm::handlePatch(LirTokenStream &in)
2200 string src, fragName, guardName, destName;
2202 if (!in.getName(src) || !in.eat(PUNCT, "->") || !in.getName(destName))
2203 bad("incorrect syntax");
2205 // Break the src at '.'. This is awkward but the syntax looks nice.
2206 size_t j = src.find('.');
2207 if (j == string::npos || j == 0 || j == src.size() - 1)
2208 bad("incorrect syntax");
2209 fragName = src.substr(0, j);
2210 guardName = src.substr(j + 1);
2212 Fragments::iterator i;
2213 if ((i=mFragments.find(fragName)) == mFragments.end())
2214 bad("invalid fragment reference");
2215 LirasmFragment *frag = &i->second;
2216 if (frag->mLabels.find(guardName) == frag->mLabels.end())
2217 bad("invalid guard reference");
2218 LIns *ins = frag->mLabels.find(guardName)->second;
2219 if ((i=mFragments.find(destName)) == mFragments.end())
2220 bad("invalid guard reference");
2221 ins->record()->exit->target = i->second.fragptr;
2223 mAssm.patch(ins->record()->exit);
2227 usageAndQuit(const string& progname)
2230 "usage: " << progname << " [options] [filename]\n"
2232 " -h --help print this message\n"
2233 " -v --verbose print LIR and assembly code\n"
2234 " --execute execute LIR\n"
2235 " --[no-]optimize enable or disable optimization of the LIR (default=off)\n"
2236 " --random [N] generate a random LIR block of size N (default=1000)\n"
2238 "Build query options (these print a value for this build of lirasm and exit)\n"
2239 " --show-arch show the architecture ('i386', 'X64', 'arm', 'ppc',\n"
2240 " 'sparc', 'mips', or 'sh4')\n"
2241 " --show-word-size show the word size ('32' or '64')\n"
2242 " --show-endianness show the endianness ('little-endian' or 'big-endian')\n"
2244 "i386-specific options:\n"
2245 " --[no]sse use SSE2 instructions (default=on)\n"
2247 "ARM-specific options:\n"
2248 " --arch N use ARM architecture version N instructions (default=7)\n"
2249 " --[no]vfp use ARM VFP instructions (default=on)\n"
2256 errMsgAndQuit(const string& progname, const string& msg)
2258 cerr << progname << ": " << msg << endl;
2262 struct CmdLineOptions {
2272 processCmdLine(int argc, char **argv, CmdLineOptions& opts)
2274 opts.progname = argv[0];
2275 opts.verbose = false;
2276 opts.execute = false;
2278 opts.optimize = false;
2280 // Architecture-specific options.
2281 #if defined NANOJIT_IA32
2282 bool i386_sse = true;
2283 #elif defined NANOJIT_ARM
2284 unsigned int arm_arch = 7;
2285 bool arm_vfp = true;
2288 for (int i = 1; i < argc; i++) {
2289 string arg = argv[i];
2291 // Common flags for every architecture.
2292 if (arg == "-h" || arg == "--help")
2293 usageAndQuit(opts.progname);
2294 else if (arg == "-v" || arg == "--verbose")
2295 opts.verbose = true;
2296 else if (arg == "--execute")
2297 opts.execute = true;
2298 else if (arg == "--optimize")
2299 opts.optimize = true;
2300 else if (arg == "--no-optimize")
2301 opts.optimize = false;
2302 else if (arg == "--random") {
2303 const int defaultSize = 100;
2304 if (i == argc - 1) {
2305 opts.random = defaultSize; // no numeric argument, use default
2308 int res = strtol(argv[i+1], &endptr, 10);
2309 if ('\0' == *endptr) {
2310 // We don't bother checking for overflow.
2312 errMsgAndQuit(opts.progname, "--random argument must be greater than zero");
2313 opts.random = res; // next arg is a number, use that for the size
2316 opts.random = defaultSize; // next arg is not a number
2320 else if (arg == "--show-arch") {
2322 #if defined NANOJIT_IA32
2324 #elif defined NANOJIT_X64
2326 #elif defined NANOJIT_ARM
2328 #elif defined NANOJIT_PPC
2330 #elif defined NANOJIT_SPARC
2332 #elif defined NANOJIT_MIPS
2334 #elif defined NANOJIT_SH4
2337 # error "unknown arch"
2339 cout << str << "\n";
2342 else if (arg == "--show-word-size") {
2343 cout << sizeof(void*) * 8 << "\n";
2346 else if (arg == "--show-endianness") {
2347 int32_t x = 0x01020304;
2348 if (*(char*)&x == 0x1) {
2349 cout << "big-endian" << "\n";
2351 cout << "little-endian" << "\n";
2356 // Architecture-specific flags.
2357 #if defined NANOJIT_IA32
2358 else if (arg == "--sse") {
2361 else if (arg == "--nosse") {
2364 #elif defined NANOJIT_ARM
2365 else if ((arg == "--arch") && (i < argc-1)) {
2367 arm_arch = strtoul(argv[i+1], &endptr, 10);
2368 // Check that the argument was a number.
2369 if ('\0' == *endptr) {
2370 if ((arm_arch < 4) || (arm_arch > 7)) {
2371 errMsgAndQuit(opts.progname, "Unsupported argument to --arch.\n");
2374 errMsgAndQuit(opts.progname, "Unrecognized argument to --arch.\n");
2377 } else if (arg == "--vfp") {
2379 } else if (arg == "--novfp") {
2383 // Input file names.
2384 else if (arg[0] != '-') {
2385 if (opts.filename.empty())
2386 opts.filename = arg;
2388 errMsgAndQuit(opts.progname, "you can only specify one filename");
2390 // No matching flag found, so report the error.
2392 errMsgAndQuit(opts.progname, "bad option: " + arg);
2395 if ((!opts.random && opts.filename.empty()) || (opts.random && !opts.filename.empty()))
2396 errMsgAndQuit(opts.progname,
2397 "you must specify either a filename or --random (but not both)");
2399 // Handle the architecture-specific options.
2400 #if defined NANOJIT_IA32
2401 avmplus::AvmCore::config.i386_use_cmov = avmplus::AvmCore::config.i386_sse2 = i386_sse;
2402 avmplus::AvmCore::config.i386_fixed_esp = true;
2403 #elif defined NANOJIT_ARM
2404 // Warn about untested configurations.
2405 if ( ((arm_arch == 5) && (arm_vfp)) || ((arm_arch >= 6) && (!arm_vfp)) ) {
2406 char const * vfp_string = (arm_vfp) ? ("VFP") : ("no VFP");
2407 cerr << "Warning: This configuration (ARMv" << arm_arch << ", " << vfp_string << ") " <<
2408 "is not regularly tested." << endl;
2411 avmplus::AvmCore::config.arm_arch = arm_arch;
2412 avmplus::AvmCore::config.arm_vfp = arm_vfp;
2413 avmplus::AvmCore::config.soft_float = !arm_vfp;
2418 main(int argc, char **argv)
2420 CmdLineOptions opts;
2421 processCmdLine(argc, argv, opts);
2423 Lirasm lasm(opts.verbose);
2425 lasm.assembleRandom(opts.random, opts.optimize);
2427 ifstream in(opts.filename.c_str());
2429 errMsgAndQuit(opts.progname, "unable to open file " + opts.filename);
2430 lasm.assemble(in, opts.optimize);
2433 Fragments::const_iterator i;
2435 i = lasm.mFragments.find("main");
2436 if (i == lasm.mFragments.end())
2437 errMsgAndQuit(opts.progname, "error: at least one fragment must be named 'main'");
2438 switch (i->second.mReturnType) {
2440 int res = i->second.rint();
2441 cout << "Output is: " << res << endl;
2444 #ifdef NANOJIT_64BIT
2446 int res = i->second.rquad();
2447 cout << "Output is: " << res << endl;
2452 double res = i->second.rdouble();
2453 cout << "Output is: " << res << endl;
2457 LasmSideExit *ls = (LasmSideExit*) i->second.rguard()->exit;
2458 cout << "Exited block on line: " << ls->line << endl;
2463 for (i = lasm.mFragments.begin(); i != lasm.mFragments.end(); i++)
2464 dump_srecords(cout, i->second.fragptr);