1 /***********************************************************************
3 A JavaScript tokenizer / parser / beautifier / compressor.
5 This version is suitable for Node.js. With minimal changes (the
6 exports stuff) it should work on any JS platform.
8 This file contains the tokenizer/parser. It is a port to JavaScript
9 of parse-js [1], a JavaScript parser library written in Common Lisp
10 by Marijn Haverbeke. Thank you Marijn!
12 [1] http://marijn.haverbeke.nl/parse-js/
16 - tokenizer(code) -- returns a function. Call the returned
17 function to fetch the next token.
19 - parse(code) -- returns an AST of the given JavaScript code.
21 -------------------------------- (C) ---------------------------------
24 <mihai.bazon@gmail.com>
25 http://mihai.bazon.net/blog
27 Distributed under the BSD license:
29 Copyright 2010 (c) Mihai Bazon <mihai.bazon@gmail.com>
30 Based on parse-js (http://marijn.haverbeke.nl/parse-js/).
32 Redistribution and use in source and binary forms, with or without
33 modification, are permitted provided that the following conditions
36 * Redistributions of source code must retain the above
37 copyright notice, this list of conditions and the following
40 * Redistributions in binary form must reproduce the above
41 copyright notice, this list of conditions and the following
42 disclaimer in the documentation and/or other materials
43 provided with the distribution.
45 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER “AS IS” AND ANY
46 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
49 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
50 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
54 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
55 THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 ***********************************************************************/
60 /* -----[ Tokenizer (constants) ]----- */
62 var KEYWORDS = array_to_hash([
90 var RESERVED_WORDS = array_to_hash([
123 var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
132 var KEYWORDS_ATOM = array_to_hash([
139 var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
141 var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
142 var RE_OCT_NUMBER = /^0[0-7]+$/;
143 var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
145 var OPERATORS = array_to_hash([
192 var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
194 var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:"));
196 var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
198 var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
200 /* -----[ Tokenizer ]----- */
202 // regexps adapted from http://xregexp.com/plugins/#unicode
204 letter: new RegExp("[\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0523\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972\\u097B-\\u097F\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D3D\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC\\u0EDD\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8B\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10D0-\\u10FA\\u10FC\\u1100-\\u1159\\u115F-\\u11A2\\u11A8-\\u11F9\\u1200-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u1676\\u1681-\\u169A\\u16A0-\\u16EA\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u1900-\\u191C\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19A9\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u2094\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C6F\\u2C71-\\u2C7D\\u2C80-\\u2CE4\\u2D00-\\u2D25\\u2D30-\\u2D65\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31B7\\u31F0-\\u31FF\\u3400\\u4DB5\\u4E00\\u9FC3\\uA000-\\uA48C\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA65F\\uA662-\\uA66E\\uA67F-\\uA697\\uA717-\\uA71F\\uA722-\\uA788\\uA78B\\uA78C\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA90A-\\uA925\\uA930-\\uA946\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAC00\\uD7A3\\uF900-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"),
205 non_spacing_mark: new RegExp("[\\u0300-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065E\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0900-\\u0902\\u093C\\u0941-\\u0948\\u094D\\u0951-\\u0955\\u0962\\u0963\\u0981\\u09BC\\u09C1-\\u09C4\\u09CD\\u09E2\\u09E3\\u0A01\\u0A02\\u0A3C\\u0A41\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81\\u0A82\\u0ABC\\u0AC1-\\u0AC5\\u0AC7\\u0AC8\\u0ACD\\u0AE2\\u0AE3\\u0B01\\u0B3C\\u0B3F\\u0B41-\\u0B44\\u0B4D\\u0B56\\u0B62\\u0B63\\u0B82\\u0BC0\\u0BCD\\u0C3E-\\u0C40\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0CBC\\u0CBF\\u0CC6\\u0CCC\\u0CCD\\u0CE2\\u0CE3\\u0D41-\\u0D44\\u0D4D\\u0D62\\u0D63\\u0DCA\\u0DD2-\\u0DD4\\u0DD6\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71-\\u0F7E\\u0F80-\\u0F84\\u0F86\\u0F87\\u0F90-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102D-\\u1030\\u1032-\\u1037\\u1039\\u103A\\u103D\\u103E\\u1058\\u1059\\u105E-\\u1060\\u1071-\\u1074\\u1082\\u1085\\u1086\\u108D\\u109D\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752\\u1753\\u1772\\u1773\\u17B7-\\u17BD\\u17C6\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1927\\u1928\\u1932\\u1939-\\u193B\\u1A17\\u1A18\\u1A56\\u1A58-\\u1A5E\\u1A60\\u1A62\\u1A65-\\u1A6C\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B34\\u1B36-\\u1B3A\\u1B3C\\u1B42\\u1B6B-\\u1B73\\u1B80\\u1B81\\u1BA2-\\u1BA5\\u1BA8\\u1BA9\\u1C2C-\\u1C33\\u1C36\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1DFF\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA825\\uA826\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA980-\\uA982\\uA9B3\\uA9B6-\\uA9B9\\uA9BC\\uAA29-\\uAA2E\\uAA31\\uAA32\\uAA35\\uAA36\\uAA43\\uAA4C\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABE5\\uABE8\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26]"),
206 space_combining_mark: new RegExp("[\\u0903\\u093E-\\u0940\\u0949-\\u094C\\u094E\\u0982\\u0983\\u09BE-\\u09C0\\u09C7\\u09C8\\u09CB\\u09CC\\u09D7\\u0A03\\u0A3E-\\u0A40\\u0A83\\u0ABE-\\u0AC0\\u0AC9\\u0ACB\\u0ACC\\u0B02\\u0B03\\u0B3E\\u0B40\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0B57\\u0BBE\\u0BBF\\u0BC1\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BD7\\u0C01-\\u0C03\\u0C41-\\u0C44\\u0C82\\u0C83\\u0CBE\\u0CC0-\\u0CC4\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D40\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D57\\u0D82\\u0D83\\u0DCF-\\u0DD1\\u0DD8-\\u0DDF\\u0DF2\\u0DF3\\u0F3E\\u0F3F\\u0F7F\\u102B\\u102C\\u1031\\u1038\\u103B\\u103C\\u1056\\u1057\\u1062-\\u1064\\u1067-\\u106D\\u1083\\u1084\\u1087-\\u108C\\u108F\\u109A-\\u109C\\u17B6\\u17BE-\\u17C5\\u17C7\\u17C8\\u1923-\\u1926\\u1929-\\u192B\\u1930\\u1931\\u1933-\\u1938\\u19B0-\\u19C0\\u19C8\\u19C9\\u1A19-\\u1A1B\\u1A55\\u1A57\\u1A61\\u1A63\\u1A64\\u1A6D-\\u1A72\\u1B04\\u1B35\\u1B3B\\u1B3D-\\u1B41\\u1B43\\u1B44\\u1B82\\u1BA1\\u1BA6\\u1BA7\\u1BAA\\u1C24-\\u1C2B\\u1C34\\u1C35\\u1CE1\\u1CF2\\uA823\\uA824\\uA827\\uA880\\uA881\\uA8B4-\\uA8C3\\uA952\\uA953\\uA983\\uA9B4\\uA9B5\\uA9BA\\uA9BB\\uA9BD-\\uA9C0\\uAA2F\\uAA30\\uAA33\\uAA34\\uAA4D\\uAA7B\\uABE3\\uABE4\\uABE6\\uABE7\\uABE9\\uABEA\\uABEC]"),
207 connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
210 function is_letter(ch) {
211 return UNICODE.letter.test(ch);
214 function is_digit(ch) {
215 ch = ch.charCodeAt(0);
216 return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
219 function is_alphanumeric_char(ch) {
220 return is_digit(ch) || is_letter(ch);
223 function is_unicode_combining_mark(ch) {
224 return UNICODE.non_spacing_mark.test(ch) || UNICODE.space_combining_mark.test(ch);
227 function is_unicode_connector_punctuation(ch) {
228 return UNICODE.connector_punctuation.test(ch);
231 function is_identifier_start(ch) {
232 return ch == "$" || ch == "_" || is_letter(ch);
235 function is_identifier_char(ch) {
236 return is_identifier_start(ch)
237 || is_unicode_combining_mark(ch)
239 || is_unicode_connector_punctuation(ch)
240 || ch == "\u200c" // zero-width non-joiner <ZWNJ>
241 || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
245 function parse_js_number(num) {
246 if (RE_HEX_NUMBER.test(num)) {
247 return parseInt(num.substr(2), 16);
248 } else if (RE_OCT_NUMBER.test(num)) {
249 return parseInt(num.substr(1), 8);
250 } else if (RE_DEC_NUMBER.test(num)) {
251 return parseFloat(num);
255 function JS_Parse_Error(message, line, col, pos) {
256 this.message = message;
260 this.stack = new Error().stack;
263 JS_Parse_Error.prototype.toString = function() {
264 return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
267 function js_error(message, line, col, pos) {
268 throw new JS_Parse_Error(message, line, col, pos);
271 function is_token(token, type, val) {
272 return token.type == type && (val == null || token.value == val);
277 function tokenizer($TEXT) {
280 text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
287 newline_before : false,
288 regex_allowed : false,
292 function peek() { return S.text.charAt(S.pos); };
294 function next(signal_eof, in_string) {
295 var ch = S.text.charAt(S.pos++);
296 if (signal_eof && !ch)
299 S.newline_before = S.newline_before || !in_string;
312 function find(what, signal_eof) {
313 var pos = S.text.indexOf(what, S.pos);
314 if (signal_eof && pos == -1) throw EX_EOF;
318 function start_token() {
324 function token(type, value, is_comment) {
325 S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
326 (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
327 (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
334 nlb : S.newline_before
337 ret.comments_before = S.comments_before;
338 S.comments_before = [];
340 S.newline_before = false;
344 function skip_whitespace() {
345 while (HOP(WHITESPACE_CHARS, peek()))
349 function read_while(pred) {
350 var ret = "", ch = peek(), i = 0;
351 while (ch && pred(ch, i++)) {
358 function parse_error(err) {
359 js_error(err, S.tokline, S.tokcol, S.tokpos);
362 function read_num(prefix) {
363 var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
364 var num = read_while(function(ch, i){
365 if (ch == "x" || ch == "X") {
366 if (has_x) return false;
369 if (!has_x && (ch == "E" || ch == "e")) {
370 if (has_e) return false;
371 return has_e = after_e = true;
374 if (after_e || (i == 0 && !prefix)) return true;
377 if (ch == "+") return after_e;
380 if (!has_dot && !has_x)
381 return has_dot = true;
384 return is_alphanumeric_char(ch);
388 var valid = parse_js_number(num);
390 return token("num", valid);
392 parse_error("Invalid syntax: " + num);
396 function read_escaped_char(in_string) {
397 var ch = next(true, in_string);
399 case "n" : return "\n";
400 case "r" : return "\r";
401 case "t" : return "\t";
402 case "b" : return "\b";
403 case "v" : return "\u000b";
404 case "f" : return "\f";
405 case "0" : return "\0";
406 case "x" : return String.fromCharCode(hex_bytes(2));
407 case "u" : return String.fromCharCode(hex_bytes(4));
408 case "\n": return "";
413 function hex_bytes(n) {
416 var digit = parseInt(next(true), 16);
418 parse_error("Invalid hex-character pattern in string");
419 num = (num << 4) | digit;
424 function read_string() {
425 return with_eof_error("Unterminated string constant", function(){
426 var quote = next(), ret = "";
430 // read OctalEscapeSequence (XXX: deprecated if "strict mode")
431 // https://github.com/mishoo/UglifyJS/issues/178
432 var octal_len = 0, first = null;
433 ch = read_while(function(ch){
434 if (ch >= "0" && ch <= "7") {
439 else if (first <= "3" && octal_len <= 2) return ++octal_len;
440 else if (first >= "4" && octal_len <= 1) return ++octal_len;
444 if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
445 else ch = read_escaped_char(true);
447 else if (ch == quote) break;
450 return token("string", ret);
454 function read_line_comment() {
456 var i = find("\n"), ret;
458 ret = S.text.substr(S.pos);
459 S.pos = S.text.length;
461 ret = S.text.substring(S.pos, i);
464 return token("comment1", ret, true);
467 function read_multiline_comment() {
469 return with_eof_error("Unterminated multiline comment", function(){
470 var i = find("*/", true),
471 text = S.text.substring(S.pos, i),
472 tok = token("comment2", text, true);
474 S.line += text.split("\n").length - 1;
475 S.newline_before = text.indexOf("\n") >= 0;
477 // https://github.com/mishoo/UglifyJS/issues/#issue/100
478 if (/^@cc_on/i.test(text)) {
479 warn("WARNING: at line " + S.line);
480 warn("*** Found \"conditional comment\": " + text);
481 warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer.");
488 function read_name() {
489 var backslash = false, name = "", ch;
490 while ((ch = peek()) != null) {
492 if (ch == "\\") backslash = true, next();
493 else if (is_identifier_char(ch)) name += next();
497 if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
498 ch = read_escaped_char();
499 if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
507 function read_regexp(regexp) {
508 return with_eof_error("Unterminated regular expression", function(){
509 var prev_backslash = false, ch, in_class = false;
510 while ((ch = next(true))) if (prev_backslash) {
512 prev_backslash = false;
513 } else if (ch == "[") {
516 } else if (ch == "]" && in_class) {
519 } else if (ch == "/" && !in_class) {
521 } else if (ch == "\\") {
522 prev_backslash = true;
526 var mods = read_name();
527 return token("regexp", [ regexp, mods ]);
531 function read_operator(prefix) {
533 if (!peek()) return op;
534 var bigger = op + peek();
535 if (HOP(OPERATORS, bigger)) {
542 return token("operator", grow(prefix || next()));
545 function handle_slash() {
547 var regex_allowed = S.regex_allowed;
550 S.comments_before.push(read_line_comment());
551 S.regex_allowed = regex_allowed;
554 S.comments_before.push(read_multiline_comment());
555 S.regex_allowed = regex_allowed;
558 return S.regex_allowed ? read_regexp("") : read_operator("/");
561 function handle_dot() {
563 return is_digit(peek())
565 : token("punc", ".");
568 function read_word() {
569 var word = read_name();
570 return !HOP(KEYWORDS, word)
571 ? token("name", word)
572 : HOP(OPERATORS, word)
573 ? token("operator", word)
574 : HOP(KEYWORDS_ATOM, word)
575 ? token("atom", word)
576 : token("keyword", word);
579 function with_eof_error(eof_error, cont) {
583 if (ex === EX_EOF) parse_error(eof_error);
588 function next_token(force_regexp) {
589 if (force_regexp != null)
590 return read_regexp(force_regexp);
594 if (!ch) return token("eof");
595 if (is_digit(ch)) return read_num();
596 if (ch == '"' || ch == "'") return read_string();
597 if (HOP(PUNC_CHARS, ch)) return token("punc", next());
598 if (ch == ".") return handle_dot();
599 if (ch == "/") return handle_slash();
600 if (HOP(OPERATOR_CHARS, ch)) return read_operator();
601 if (ch == "\\" || is_identifier_start(ch)) return read_word();
602 parse_error("Unexpected character '" + ch + "'");
605 next_token.context = function(nc) {
614 /* -----[ Parser (constants) ]----- */
616 var UNARY_PREFIX = array_to_hash([
628 var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
630 var ASSIGNMENT = (function(a, ret, i){
631 while (i < a.length) {
632 ret[a[i]] = a[i].substr(0, a[i].length - 1);
637 ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
642 var PRECEDENCE = (function(a, ret){
643 for (var i = 0, n = 1; i < a.length; ++i, ++n) {
645 for (var j = 0; j < b.length; ++j) {
657 ["==", "===", "!=", "!=="],
658 ["<", ">", "<=", ">=", "in", "instanceof"],
666 var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
668 var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
670 /* -----[ Parser ]----- */
672 function NodeWithToken(str, start, end) {
678 NodeWithToken.prototype.toString = function() { return this.name; };
680 function parse($TEXT, exigent_mode, embed_tokens) {
683 input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
694 function is(type, value) {
695 return is_token(S.token, type, value);
698 function peek() { return S.peeked || (S.peeked = S.input()); };
715 function croak(msg, line, col, pos) {
716 var ctx = S.input.context();
718 line != null ? line : ctx.tokline,
719 col != null ? col : ctx.tokcol,
720 pos != null ? pos : ctx.tokpos);
723 function token_error(token, msg) {
724 croak(msg, token.line, token.col);
727 function unexpected(token) {
730 token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
733 function expect_token(type, val) {
737 token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
740 function expect(punc) { return expect_token("punc", punc); };
742 function can_insert_semicolon() {
743 return !exigent_mode && (
744 S.token.nlb || is("eof") || is("punc", "}")
748 function semicolon() {
749 if (is("punc", ";")) next();
750 else if (!can_insert_semicolon()) unexpected();
754 return slice(arguments);
757 function parenthesised() {
759 var ex = expression();
764 function add_tokens(str, start, end) {
765 return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
768 function maybe_embed_tokens(parser) {
769 if (embed_tokens) return function() {
771 var ast = parser.apply(this, arguments);
772 ast[0] = add_tokens(ast[0], start, prev());
778 var statement = maybe_embed_tokens(function() {
779 if (is("operator", "/") || is("operator", "/=")) {
781 S.token = S.input(S.token.value.substr(1)); // force regexp
783 switch (S.token.type) {
789 return simple_statement();
792 return is_token(peek(), "punc", ":")
793 ? labeled_statement(prog1(S.token.value, next, next))
794 : simple_statement();
797 switch (S.token.value) {
799 return as("block", block_());
802 return simple_statement();
811 switch (prog1(S.token.value, next)) {
813 return break_cont("break");
816 return break_cont("continue");
820 return as("debugger");
823 return (function(body){
824 expect_token("keyword", "while");
825 return as("do", prog1(parenthesised, semicolon), body);
826 })(in_loop(statement));
832 return function_(true);
838 if (S.in_function == 0)
839 croak("'return' outside of function");
843 : can_insert_semicolon()
845 : prog1(expression, semicolon));
848 return as("switch", parenthesised(), switch_block_());
852 croak("Illegal newline after 'throw'");
853 return as("throw", prog1(expression, semicolon));
859 return prog1(var_, semicolon);
862 return prog1(const_, semicolon);
865 return as("while", parenthesised(), in_loop(statement));
868 return as("with", parenthesised(), statement());
876 function labeled_statement(label) {
877 S.labels.push(label);
878 var start = S.token, stat = statement();
879 if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
882 return as("label", label, stat);
885 function simple_statement() {
886 return as("stat", prog1(expression, semicolon));
889 function break_cont(type) {
891 if (!can_insert_semicolon()) {
892 name = is("name") ? S.token.value : null;
896 if (!member(name, S.labels))
897 croak("Label " + name + " without matching loop or statement");
899 else if (S.in_loop == 0)
900 croak(type + " not inside a loop or switch");
902 return as(type, name);
908 if (!is("punc", ";")) {
909 init = is("keyword", "var")
910 ? (next(), var_(true))
911 : expression(true, true);
912 if (is("operator", "in"))
915 return regular_for(init);
918 function regular_for(init) {
920 var test = is("punc", ";") ? null : expression();
922 var step = is("punc", ")") ? null : expression();
924 return as("for", init, test, step, in_loop(statement));
927 function for_in(init) {
928 var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
930 var obj = expression();
932 return as("for-in", init, lhs, obj, in_loop(statement));
935 var function_ = maybe_embed_tokens(function(in_statement) {
936 var name = is("name") ? prog1(S.token.value, next) : null;
937 if (in_statement && !name)
940 return as(in_statement ? "defun" : "function",
944 while (!is("punc", ")")) {
945 if (first) first = false; else expect(",");
946 if (!is("name")) unexpected();
947 a.push(S.token.value);
956 var loop = S.in_loop;
966 var cond = parenthesised(), body = statement(), belse;
967 if (is("keyword", "else")) {
971 return as("if", cond, body, belse);
977 while (!is("punc", "}")) {
978 if (is("eof")) unexpected();
985 var switch_block_ = curry(in_loop, function(){
987 var a = [], cur = null;
988 while (!is("punc", "}")) {
989 if (is("eof")) unexpected();
990 if (is("keyword", "case")) {
993 a.push([ expression(), cur ]);
996 else if (is("keyword", "default")) {
1000 a.push([ null, cur ]);
1003 if (!cur) unexpected();
1004 cur.push(statement());
1012 var body = block_(), bcatch, bfinally;
1013 if (is("keyword", "catch")) {
1017 croak("Name expected");
1018 var name = S.token.value;
1021 bcatch = [ name, block_() ];
1023 if (is("keyword", "finally")) {
1025 bfinally = block_();
1027 if (!bcatch && !bfinally)
1028 croak("Missing catch/finally blocks");
1029 return as("try", body, bcatch, bfinally);
1032 function vardefs(no_in) {
1037 var name = S.token.value;
1039 if (is("operator", "=")) {
1041 a.push([ name, expression(false, no_in) ]);
1045 if (!is("punc", ","))
1052 function var_(no_in) {
1053 return as("var", vardefs(no_in));
1057 return as("const", vardefs());
1061 var newexp = expr_atom(false), args;
1062 if (is("punc", "(")) {
1064 args = expr_list(")");
1068 return subscripts(as("new", newexp, args), true);
1071 var expr_atom = maybe_embed_tokens(function(allow_calls) {
1072 if (is("operator", "new")) {
1077 switch (S.token.value) {
1080 return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
1083 return subscripts(array_(), allow_calls);
1086 return subscripts(object_(), allow_calls);
1090 if (is("keyword", "function")) {
1092 return subscripts(function_(false), allow_calls);
1094 if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
1095 var atom = S.token.type == "regexp"
1096 ? as("regexp", S.token.value[0], S.token.value[1])
1097 : as(S.token.type, S.token.value);
1098 return subscripts(prog1(atom, next), allow_calls);
1103 function expr_list(closing, allow_trailing_comma, allow_empty) {
1104 var first = true, a = [];
1105 while (!is("punc", closing)) {
1106 if (first) first = false; else expect(",");
1107 if (allow_trailing_comma && is("punc", closing)) break;
1108 if (is("punc", ",") && allow_empty) {
1109 a.push([ "atom", "undefined" ]);
1111 a.push(expression(false));
1119 return as("array", expr_list("]", !exigent_mode, true));
1122 function object_() {
1123 var first = true, a = [];
1124 while (!is("punc", "}")) {
1125 if (first) first = false; else expect(",");
1126 if (!exigent_mode && is("punc", "}"))
1127 // allow trailing comma
1129 var type = S.token.type;
1130 var name = as_property_name();
1131 if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) {
1132 a.push([ as_name(), function_(false), name ]);
1135 a.push([ name, expression(false) ]);
1139 return as("object", a);
1142 function as_property_name() {
1143 switch (S.token.type) {
1146 return prog1(S.token.value, next);
1151 function as_name() {
1152 switch (S.token.type) {
1157 return prog1(S.token.value, next);
1163 function subscripts(expr, allow_calls) {
1164 if (is("punc", ".")) {
1166 return subscripts(as("dot", expr, as_name()), allow_calls);
1168 if (is("punc", "[")) {
1170 return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
1172 if (allow_calls && is("punc", "(")) {
1174 return subscripts(as("call", expr, expr_list(")")), true);
1179 function maybe_unary(allow_calls) {
1180 if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
1181 return make_unary("unary-prefix",
1182 prog1(S.token.value, next),
1183 maybe_unary(allow_calls));
1185 var val = expr_atom(allow_calls);
1186 while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
1187 val = make_unary("unary-postfix", S.token.value, val);
1193 function make_unary(tag, op, expr) {
1194 if ((op == "++" || op == "--") && !is_assignable(expr))
1195 croak("Invalid use of " + op + " operator");
1196 return as(tag, op, expr);
1199 function expr_op(left, min_prec, no_in) {
1200 var op = is("operator") ? S.token.value : null;
1201 if (op && op == "in" && no_in) op = null;
1202 var prec = op != null ? PRECEDENCE[op] : null;
1203 if (prec != null && prec > min_prec) {
1205 var right = expr_op(maybe_unary(true), prec, no_in);
1206 return expr_op(as("binary", op, left, right), min_prec, no_in);
1211 function expr_ops(no_in) {
1212 return expr_op(maybe_unary(true), 0, no_in);
1215 function maybe_conditional(no_in) {
1216 var expr = expr_ops(no_in);
1217 if (is("operator", "?")) {
1219 var yes = expression(false);
1221 return as("conditional", expr, yes, expression(false, no_in));
1226 function is_assignable(expr) {
1227 if (!exigent_mode) return true;
1228 switch (expr[0]+"") {
1235 return expr[1] != "this";
1239 function maybe_assign(no_in) {
1240 var left = maybe_conditional(no_in), val = S.token.value;
1241 if (is("operator") && HOP(ASSIGNMENT, val)) {
1242 if (is_assignable(left)) {
1244 return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
1246 croak("Invalid assignment");
1251 var expression = maybe_embed_tokens(function(commas, no_in) {
1252 if (arguments.length == 0)
1254 var expr = maybe_assign(no_in);
1255 if (commas && is("punc", ",")) {
1257 return as("seq", expr, expression(true, no_in));
1262 function in_loop(cont) {
1271 return as("toplevel", (function(a){
1273 a.push(statement());
1279 /* -----[ Utilities ]----- */
1282 var args = slice(arguments, 1);
1283 return function() { return f.apply(this, args.concat(slice(arguments))); };
1286 function prog1(ret) {
1287 if (ret instanceof Function)
1289 for (var i = 1, n = arguments.length; --n > 0; ++i)
1294 function array_to_hash(a) {
1296 for (var i = 0; i < a.length; ++i)
1301 function slice(a, start) {
1302 return Array.prototype.slice.call(a, start || 0);
1305 function characters(str) {
1306 return str.split("");
1309 function member(name, array) {
1310 for (var i = array.length; --i >= 0;)
1311 if (array[i] === name)
1316 function HOP(obj, prop) {
1317 return Object.prototype.hasOwnProperty.call(obj, prop);
1320 var warn = function() {};
1322 /* -----[ Exports ]----- */
1324 exports.tokenizer = tokenizer;
1325 exports.parse = parse;
1326 exports.slice = slice;
1327 exports.curry = curry;
1328 exports.member = member;
1329 exports.array_to_hash = array_to_hash;
1330 exports.PRECEDENCE = PRECEDENCE;
1331 exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
1332 exports.RESERVED_WORDS = RESERVED_WORDS;
1333 exports.KEYWORDS = KEYWORDS;
1334 exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
1335 exports.OPERATORS = OPERATORS;
1336 exports.is_alphanumeric_char = is_alphanumeric_char;
1337 exports.set_logger = function(logger) {