1 /***********************************************************************
3 A JavaScript tokenizer / parser / beautifier / compressor.
5 This version is suitable for Node.js. With minimal changes (the
6 exports stuff) it should work on any JS platform.
8 This file contains the tokenizer/parser. It is a port to JavaScript
9 of parse-js [1], a JavaScript parser library written in Common Lisp
10 by Marijn Haverbeke. Thank you Marijn!
12 [1] http://marijn.haverbeke.nl/parse-js/
16 - tokenizer(code) -- returns a function. Call the returned
17 function to fetch the next token.
19 - parse(code) -- returns an AST of the given JavaScript code.
21 -------------------------------- (C) ---------------------------------
24 <mihai.bazon@gmail.com>
25 http://mihai.bazon.net/blog
27 Distributed under the BSD license:
29 Copyright 2010 (c) Mihai Bazon <mihai.bazon@gmail.com>
30 Based on parse-js (http://marijn.haverbeke.nl/parse-js/).
32 Redistribution and use in source and binary forms, with or without
33 modification, are permitted provided that the following conditions
36 * Redistributions of source code must retain the above
37 copyright notice, this list of conditions and the following
40 * Redistributions in binary form must reproduce the above
41 copyright notice, this list of conditions and the following
42 disclaimer in the documentation and/or other materials
43 provided with the distribution.
45 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER “AS IS” AND ANY
46 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
49 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
50 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
54 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
55 THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 ***********************************************************************/
60 /* -----[ Tokenizer (constants) ]----- */
62 var KEYWORDS = array_to_hash([
91 var RESERVED_WORDS = array_to_hash([
123 var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
132 var KEYWORDS_ATOM = array_to_hash([
139 var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
141 var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
142 var RE_OCT_NUMBER = /^0[0-7]+$/;
143 var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
145 var OPERATORS = array_to_hash([
192 var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
194 var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:"));
196 var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
198 var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
200 /* -----[ Tokenizer ]----- */
202 // regexps adapted from http://xregexp.com/plugins/#unicode
204 letter: new RegExp("[\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0523\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972\\u097B-\\u097F\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D3D\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC\\u0EDD\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8B\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10D0-\\u10FA\\u10FC\\u1100-\\u1159\\u115F-\\u11A2\\u11A8-\\u11F9\\u1200-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u1676\\u1681-\\u169A\\u16A0-\\u16EA\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u1900-\\u191C\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19A9\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u2094\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C6F\\u2C71-\\u2C7D\\u2C80-\\u2CE4\\u2D00-\\u2D25\\u2D30-\\u2D65\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31B7\\u31F0-\\u31FF\\u3400\\u4DB5\\u4E00\\u9FC3\\uA000-\\uA48C\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA65F\\uA662-\\uA66E\\uA67F-\\uA697\\uA717-\\uA71F\\uA722-\\uA788\\uA78B\\uA78C\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA90A-\\uA925\\uA930-\\uA946\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAC00\\uD7A3\\uF900-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"),
205 non_spacing_mark: new RegExp("[\\u0300-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065E\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0900-\\u0902\\u093C\\u0941-\\u0948\\u094D\\u0951-\\u0955\\u0962\\u0963\\u0981\\u09BC\\u09C1-\\u09C4\\u09CD\\u09E2\\u09E3\\u0A01\\u0A02\\u0A3C\\u0A41\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81\\u0A82\\u0ABC\\u0AC1-\\u0AC5\\u0AC7\\u0AC8\\u0ACD\\u0AE2\\u0AE3\\u0B01\\u0B3C\\u0B3F\\u0B41-\\u0B44\\u0B4D\\u0B56\\u0B62\\u0B63\\u0B82\\u0BC0\\u0BCD\\u0C3E-\\u0C40\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0CBC\\u0CBF\\u0CC6\\u0CCC\\u0CCD\\u0CE2\\u0CE3\\u0D41-\\u0D44\\u0D4D\\u0D62\\u0D63\\u0DCA\\u0DD2-\\u0DD4\\u0DD6\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71-\\u0F7E\\u0F80-\\u0F84\\u0F86\\u0F87\\u0F90-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102D-\\u1030\\u1032-\\u1037\\u1039\\u103A\\u103D\\u103E\\u1058\\u1059\\u105E-\\u1060\\u1071-\\u1074\\u1082\\u1085\\u1086\\u108D\\u109D\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752\\u1753\\u1772\\u1773\\u17B7-\\u17BD\\u17C6\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1927\\u1928\\u1932\\u1939-\\u193B\\u1A17\\u1A18\\u1A56\\u1A58-\\u1A5E\\u1A60\\u1A62\\u1A65-\\u1A6C\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B34\\u1B36-\\u1B3A\\u1B3C\\u1B42\\u1B6B-\\u1B73\\u1B80\\u1B81\\u1BA2-\\u1BA5\\u1BA8\\u1BA9\\u1C2C-\\u1C33\\u1C36\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1DFF\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA825\\uA826\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA980-\\uA982\\uA9B3\\uA9B6-\\uA9B9\\uA9BC\\uAA29-\\uAA2E\\uAA31\\uAA32\\uAA35\\uAA36\\uAA43\\uAA4C\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABE5\\uABE8\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26]"),
206 space_combining_mark: new RegExp("[\\u0903\\u093E-\\u0940\\u0949-\\u094C\\u094E\\u0982\\u0983\\u09BE-\\u09C0\\u09C7\\u09C8\\u09CB\\u09CC\\u09D7\\u0A03\\u0A3E-\\u0A40\\u0A83\\u0ABE-\\u0AC0\\u0AC9\\u0ACB\\u0ACC\\u0B02\\u0B03\\u0B3E\\u0B40\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0B57\\u0BBE\\u0BBF\\u0BC1\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BD7\\u0C01-\\u0C03\\u0C41-\\u0C44\\u0C82\\u0C83\\u0CBE\\u0CC0-\\u0CC4\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D40\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D57\\u0D82\\u0D83\\u0DCF-\\u0DD1\\u0DD8-\\u0DDF\\u0DF2\\u0DF3\\u0F3E\\u0F3F\\u0F7F\\u102B\\u102C\\u1031\\u1038\\u103B\\u103C\\u1056\\u1057\\u1062-\\u1064\\u1067-\\u106D\\u1083\\u1084\\u1087-\\u108C\\u108F\\u109A-\\u109C\\u17B6\\u17BE-\\u17C5\\u17C7\\u17C8\\u1923-\\u1926\\u1929-\\u192B\\u1930\\u1931\\u1933-\\u1938\\u19B0-\\u19C0\\u19C8\\u19C9\\u1A19-\\u1A1B\\u1A55\\u1A57\\u1A61\\u1A63\\u1A64\\u1A6D-\\u1A72\\u1B04\\u1B35\\u1B3B\\u1B3D-\\u1B41\\u1B43\\u1B44\\u1B82\\u1BA1\\u1BA6\\u1BA7\\u1BAA\\u1C24-\\u1C2B\\u1C34\\u1C35\\u1CE1\\u1CF2\\uA823\\uA824\\uA827\\uA880\\uA881\\uA8B4-\\uA8C3\\uA952\\uA953\\uA983\\uA9B4\\uA9B5\\uA9BA\\uA9BB\\uA9BD-\\uA9C0\\uAA2F\\uAA30\\uAA33\\uAA34\\uAA4D\\uAA7B\\uABE3\\uABE4\\uABE6\\uABE7\\uABE9\\uABEA\\uABEC]"),
207 connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
210 function is_letter(ch) {
211 return UNICODE.letter.test(ch);
214 function is_digit(ch) {
215 ch = ch.charCodeAt(0);
216 return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
219 function is_alphanumeric_char(ch) {
220 return is_digit(ch) || is_letter(ch);
223 function is_unicode_combining_mark(ch) {
224 return UNICODE.non_spacing_mark.test(ch) || UNICODE.space_combining_mark.test(ch);
227 function is_unicode_connector_punctuation(ch) {
228 return UNICODE.connector_punctuation.test(ch);
231 function is_identifier_start(ch) {
232 return ch == "$" || ch == "_" || is_letter(ch);
235 function is_identifier_char(ch) {
236 return is_identifier_start(ch)
237 || is_unicode_combining_mark(ch)
239 || is_unicode_connector_punctuation(ch)
240 || ch == "\u200c" // zero-width non-joiner <ZWNJ>
241 || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
245 function parse_js_number(num) {
246 if (RE_HEX_NUMBER.test(num)) {
247 return parseInt(num.substr(2), 16);
248 } else if (RE_OCT_NUMBER.test(num)) {
249 return parseInt(num.substr(1), 8);
250 } else if (RE_DEC_NUMBER.test(num)) {
251 return parseFloat(num);
255 function JS_Parse_Error(message, line, col, pos) {
256 this.message = message;
257 this.line = line + 1;
260 this.stack = new Error().stack;
263 JS_Parse_Error.prototype.toString = function() {
264 return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
267 function js_error(message, line, col, pos) {
268 throw new JS_Parse_Error(message, line, col, pos);
271 function is_token(token, type, val) {
272 return token.type == type && (val == null || token.value == val);
277 function tokenizer($TEXT) {
280 text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
287 newline_before : false,
288 regex_allowed : false,
292 function peek() { return S.text.charAt(S.pos); };
294 function next(signal_eof, in_string) {
295 var ch = S.text.charAt(S.pos++);
296 if (signal_eof && !ch)
299 S.newline_before = S.newline_before || !in_string;
312 function find(what, signal_eof) {
313 var pos = S.text.indexOf(what, S.pos);
314 if (signal_eof && pos == -1) throw EX_EOF;
318 function start_token() {
324 function token(type, value, is_comment) {
325 S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
326 (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
327 (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
335 nlb : S.newline_before
338 ret.comments_before = S.comments_before;
339 S.comments_before = [];
341 S.newline_before = false;
345 function skip_whitespace() {
346 while (HOP(WHITESPACE_CHARS, peek()))
350 function read_while(pred) {
351 var ret = "", ch = peek(), i = 0;
352 while (ch && pred(ch, i++)) {
359 function parse_error(err) {
360 js_error(err, S.tokline, S.tokcol, S.tokpos);
363 function read_num(prefix) {
364 var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
365 var num = read_while(function(ch, i){
366 if (ch == "x" || ch == "X") {
367 if (has_x) return false;
370 if (!has_x && (ch == "E" || ch == "e")) {
371 if (has_e) return false;
372 return has_e = after_e = true;
375 if (after_e || (i == 0 && !prefix)) return true;
378 if (ch == "+") return after_e;
381 if (!has_dot && !has_x)
382 return has_dot = true;
385 return is_alphanumeric_char(ch);
389 var valid = parse_js_number(num);
391 return token("num", valid);
393 parse_error("Invalid syntax: " + num);
397 function read_escaped_char(in_string) {
398 var ch = next(true, in_string);
400 case "n" : return "\n";
401 case "r" : return "\r";
402 case "t" : return "\t";
403 case "b" : return "\b";
404 case "v" : return "\u000b";
405 case "f" : return "\f";
406 case "0" : return "\0";
407 case "x" : return String.fromCharCode(hex_bytes(2));
408 case "u" : return String.fromCharCode(hex_bytes(4));
409 case "\n": return "";
414 function hex_bytes(n) {
417 var digit = parseInt(next(true), 16);
419 parse_error("Invalid hex-character pattern in string");
420 num = (num << 4) | digit;
425 function read_string() {
426 return with_eof_error("Unterminated string constant", function(){
427 var quote = next(), ret = "";
431 // read OctalEscapeSequence (XXX: deprecated if "strict mode")
432 // https://github.com/mishoo/UglifyJS/issues/178
433 var octal_len = 0, first = null;
434 ch = read_while(function(ch){
435 if (ch >= "0" && ch <= "7") {
440 else if (first <= "3" && octal_len <= 2) return ++octal_len;
441 else if (first >= "4" && octal_len <= 1) return ++octal_len;
445 if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
446 else ch = read_escaped_char(true);
448 else if (ch == quote) break;
451 return token("string", ret);
455 function read_line_comment() {
457 var i = find("\n"), ret;
459 ret = S.text.substr(S.pos);
460 S.pos = S.text.length;
462 ret = S.text.substring(S.pos, i);
465 return token("comment1", ret, true);
468 function read_multiline_comment() {
470 return with_eof_error("Unterminated multiline comment", function(){
471 var i = find("*/", true),
472 text = S.text.substring(S.pos, i);
474 S.line += text.split("\n").length - 1;
475 S.newline_before = text.indexOf("\n") >= 0;
477 // https://github.com/mishoo/UglifyJS/issues/#issue/100
478 if (/^@cc_on/i.test(text)) {
479 warn("WARNING: at line " + S.line);
480 warn("*** Found \"conditional comment\": " + text);
481 warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer.");
484 return token("comment2", text, true);
488 function read_name() {
489 var backslash = false, name = "", ch;
490 while ((ch = peek()) != null) {
492 if (ch == "\\") backslash = true, next();
493 else if (is_identifier_char(ch)) name += next();
497 if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
498 ch = read_escaped_char();
499 if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
507 function read_regexp(regexp) {
508 return with_eof_error("Unterminated regular expression", function(){
509 var prev_backslash = false, ch, in_class = false;
510 while ((ch = next(true))) if (prev_backslash) {
512 prev_backslash = false;
513 } else if (ch == "[") {
516 } else if (ch == "]" && in_class) {
519 } else if (ch == "/" && !in_class) {
521 } else if (ch == "\\") {
522 prev_backslash = true;
526 var mods = read_name();
527 return token("regexp", [ regexp, mods ]);
531 function read_operator(prefix) {
533 if (!peek()) return op;
534 var bigger = op + peek();
535 if (HOP(OPERATORS, bigger)) {
542 return token("operator", grow(prefix || next()));
545 function handle_slash() {
547 var regex_allowed = S.regex_allowed;
550 S.comments_before.push(read_line_comment());
551 S.regex_allowed = regex_allowed;
554 S.comments_before.push(read_multiline_comment());
555 S.regex_allowed = regex_allowed;
558 return S.regex_allowed ? read_regexp("") : read_operator("/");
561 function handle_dot() {
563 return is_digit(peek())
565 : token("punc", ".");
568 function read_word() {
569 var word = read_name();
570 return !HOP(KEYWORDS, word)
571 ? token("name", word)
572 : HOP(OPERATORS, word)
573 ? token("operator", word)
574 : HOP(KEYWORDS_ATOM, word)
575 ? token("atom", word)
576 : token("keyword", word);
579 function with_eof_error(eof_error, cont) {
583 if (ex === EX_EOF) parse_error(eof_error);
588 function next_token(force_regexp) {
589 if (force_regexp != null)
590 return read_regexp(force_regexp);
594 if (!ch) return token("eof");
595 if (is_digit(ch)) return read_num();
596 if (ch == '"' || ch == "'") return read_string();
597 if (HOP(PUNC_CHARS, ch)) return token("punc", next());
598 if (ch == ".") return handle_dot();
599 if (ch == "/") return handle_slash();
600 if (HOP(OPERATOR_CHARS, ch)) return read_operator();
601 if (ch == "\\" || is_identifier_start(ch)) return read_word();
602 parse_error("Unexpected character '" + ch + "'");
605 next_token.context = function(nc) {
614 /* -----[ Parser (constants) ]----- */
616 var UNARY_PREFIX = array_to_hash([
628 var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
630 var ASSIGNMENT = (function(a, ret, i){
631 while (i < a.length) {
632 ret[a[i]] = a[i].substr(0, a[i].length - 1);
637 ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
642 var PRECEDENCE = (function(a, ret){
643 for (var i = 0, n = 1; i < a.length; ++i, ++n) {
645 for (var j = 0; j < b.length; ++j) {
657 ["==", "===", "!=", "!=="],
658 ["<", ">", "<=", ">=", "in", "instanceof"],
666 var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
668 var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
670 /* -----[ Parser ]----- */
672 function NodeWithToken(str, start, end) {
678 NodeWithToken.prototype.toString = function() { return this.name; };
680 function parse($TEXT, exigent_mode, embed_tokens) {
683 input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
694 function is(type, value) {
695 return is_token(S.token, type, value);
698 function peek() { return S.peeked || (S.peeked = S.input()); };
715 function croak(msg, line, col, pos) {
716 var ctx = S.input.context();
718 line != null ? line : ctx.tokline,
719 col != null ? col : ctx.tokcol,
720 pos != null ? pos : ctx.tokpos);
723 function token_error(token, msg) {
724 croak(msg, token.line, token.col);
727 function unexpected(token) {
730 token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
733 function expect_token(type, val) {
737 token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
740 function expect(punc) { return expect_token("punc", punc); };
742 function can_insert_semicolon() {
743 return !exigent_mode && (
744 S.token.nlb || is("eof") || is("punc", "}")
748 function semicolon() {
749 if (is("punc", ";")) next();
750 else if (!can_insert_semicolon()) unexpected();
754 return slice(arguments);
757 function parenthesised() {
759 var ex = expression();
764 function add_tokens(str, start, end) {
765 return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
768 function maybe_embed_tokens(parser) {
769 if (embed_tokens) return function() {
771 var ast = parser.apply(this, arguments);
772 ast[0] = add_tokens(ast[0], start, prev());
778 var statement = maybe_embed_tokens(function() {
779 if (is("operator", "/") || is("operator", "/=")) {
781 S.token = S.input(S.token.value.substr(1)); // force regexp
783 switch (S.token.type) {
789 return simple_statement();
792 return is_token(peek(), "punc", ":")
793 ? labeled_statement(prog1(S.token.value, next, next))
794 : simple_statement();
797 switch (S.token.value) {
799 return as("block", block_());
802 return simple_statement();
811 switch (prog1(S.token.value, next)) {
813 return break_cont("break");
816 return break_cont("continue");
820 return as("debugger");
823 return (function(body){
824 expect_token("keyword", "while");
825 return as("do", prog1(parenthesised, semicolon), body);
826 })(in_loop(statement));
832 return function_(true);
838 if (S.in_function == 0)
839 croak("'return' outside of function");
843 : can_insert_semicolon()
845 : prog1(expression, semicolon));
848 return as("switch", parenthesised(), switch_block_());
852 croak("Illegal newline after 'throw'");
853 return as("throw", prog1(expression, semicolon));
859 return prog1(var_, semicolon);
862 return prog1(const_, semicolon);
865 return as("while", parenthesised(), in_loop(statement));
868 return as("with", parenthesised(), statement());
876 function labeled_statement(label) {
877 S.labels.push(label);
878 var start = S.token, stat = statement();
879 if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
882 return as("label", label, stat);
885 function simple_statement() {
886 return as("stat", prog1(expression, semicolon));
889 function break_cont(type) {
891 if (!can_insert_semicolon()) {
892 name = is("name") ? S.token.value : null;
896 if (!member(name, S.labels))
897 croak("Label " + name + " without matching loop or statement");
899 else if (S.in_loop == 0)
900 croak(type + " not inside a loop or switch");
902 return as(type, name);
908 if (!is("punc", ";")) {
909 init = is("keyword", "var")
910 ? (next(), var_(true))
911 : expression(true, true);
912 if (is("operator", "in")) {
913 if (init[0] == "var" && init[1].length > 1)
914 croak("Only one variable declaration allowed in for..in loop");
918 return regular_for(init);
921 function regular_for(init) {
923 var test = is("punc", ";") ? null : expression();
925 var step = is("punc", ")") ? null : expression();
927 return as("for", init, test, step, in_loop(statement));
930 function for_in(init) {
931 var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
933 var obj = expression();
935 return as("for-in", init, lhs, obj, in_loop(statement));
938 var function_ = function(in_statement) {
939 var name = is("name") ? prog1(S.token.value, next) : null;
940 if (in_statement && !name)
943 return as(in_statement ? "defun" : "function",
947 while (!is("punc", ")")) {
948 if (first) first = false; else expect(",");
949 if (!is("name")) unexpected();
950 a.push(S.token.value);
959 var loop = S.in_loop;
969 var cond = parenthesised(), body = statement(), belse;
970 if (is("keyword", "else")) {
974 return as("if", cond, body, belse);
980 while (!is("punc", "}")) {
981 if (is("eof")) unexpected();
988 var switch_block_ = curry(in_loop, function(){
990 var a = [], cur = null;
991 while (!is("punc", "}")) {
992 if (is("eof")) unexpected();
993 if (is("keyword", "case")) {
996 a.push([ expression(), cur ]);
999 else if (is("keyword", "default")) {
1003 a.push([ null, cur ]);
1006 if (!cur) unexpected();
1007 cur.push(statement());
1015 var body = block_(), bcatch, bfinally;
1016 if (is("keyword", "catch")) {
1020 croak("Name expected");
1021 var name = S.token.value;
1024 bcatch = [ name, block_() ];
1026 if (is("keyword", "finally")) {
1028 bfinally = block_();
1030 if (!bcatch && !bfinally)
1031 croak("Missing catch/finally blocks");
1032 return as("try", body, bcatch, bfinally);
1035 function vardefs(no_in) {
1040 var name = S.token.value;
1042 if (is("operator", "=")) {
1044 a.push([ name, expression(false, no_in) ]);
1048 if (!is("punc", ","))
1055 function var_(no_in) {
1056 return as("var", vardefs(no_in));
1060 return as("const", vardefs());
1064 var newexp = expr_atom(false), args;
1065 if (is("punc", "(")) {
1067 args = expr_list(")");
1071 return subscripts(as("new", newexp, args), true);
1074 var expr_atom = maybe_embed_tokens(function(allow_calls) {
1075 if (is("operator", "new")) {
1080 switch (S.token.value) {
1083 return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
1086 return subscripts(array_(), allow_calls);
1089 return subscripts(object_(), allow_calls);
1093 if (is("keyword", "function")) {
1095 return subscripts(function_(false), allow_calls);
1097 if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
1098 var atom = S.token.type == "regexp"
1099 ? as("regexp", S.token.value[0], S.token.value[1])
1100 : as(S.token.type, S.token.value);
1101 return subscripts(prog1(atom, next), allow_calls);
1106 function expr_list(closing, allow_trailing_comma, allow_empty) {
1107 var first = true, a = [];
1108 while (!is("punc", closing)) {
1109 if (first) first = false; else expect(",");
1110 if (allow_trailing_comma && is("punc", closing)) break;
1111 if (is("punc", ",") && allow_empty) {
1112 a.push([ "atom", "undefined" ]);
1114 a.push(expression(false));
1122 return as("array", expr_list("]", !exigent_mode, true));
1125 function object_() {
1126 var first = true, a = [];
1127 while (!is("punc", "}")) {
1128 if (first) first = false; else expect(",");
1129 if (!exigent_mode && is("punc", "}"))
1130 // allow trailing comma
1132 var type = S.token.type;
1133 var name = as_property_name();
1134 if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) {
1135 a.push([ as_name(), function_(false), name ]);
1138 a.push([ name, expression(false) ]);
1142 return as("object", a);
1145 function as_property_name() {
1146 switch (S.token.type) {
1149 return prog1(S.token.value, next);
1154 function as_name() {
1155 switch (S.token.type) {
1160 return prog1(S.token.value, next);
1166 function subscripts(expr, allow_calls) {
1167 if (is("punc", ".")) {
1169 return subscripts(as("dot", expr, as_name()), allow_calls);
1171 if (is("punc", "[")) {
1173 return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
1175 if (allow_calls && is("punc", "(")) {
1177 return subscripts(as("call", expr, expr_list(")")), true);
1182 function maybe_unary(allow_calls) {
1183 if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
1184 return make_unary("unary-prefix",
1185 prog1(S.token.value, next),
1186 maybe_unary(allow_calls));
1188 var val = expr_atom(allow_calls);
1189 while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
1190 val = make_unary("unary-postfix", S.token.value, val);
1196 function make_unary(tag, op, expr) {
1197 if ((op == "++" || op == "--") && !is_assignable(expr))
1198 croak("Invalid use of " + op + " operator");
1199 return as(tag, op, expr);
1202 function expr_op(left, min_prec, no_in) {
1203 var op = is("operator") ? S.token.value : null;
1204 if (op && op == "in" && no_in) op = null;
1205 var prec = op != null ? PRECEDENCE[op] : null;
1206 if (prec != null && prec > min_prec) {
1208 var right = expr_op(maybe_unary(true), prec, no_in);
1209 return expr_op(as("binary", op, left, right), min_prec, no_in);
1214 function expr_ops(no_in) {
1215 return expr_op(maybe_unary(true), 0, no_in);
1218 function maybe_conditional(no_in) {
1219 var expr = expr_ops(no_in);
1220 if (is("operator", "?")) {
1222 var yes = expression(false);
1224 return as("conditional", expr, yes, expression(false, no_in));
1229 function is_assignable(expr) {
1230 if (!exigent_mode) return true;
1231 switch (expr[0]+"") {
1238 return expr[1] != "this";
1242 function maybe_assign(no_in) {
1243 var left = maybe_conditional(no_in), val = S.token.value;
1244 if (is("operator") && HOP(ASSIGNMENT, val)) {
1245 if (is_assignable(left)) {
1247 return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
1249 croak("Invalid assignment");
1254 var expression = maybe_embed_tokens(function(commas, no_in) {
1255 if (arguments.length == 0)
1257 var expr = maybe_assign(no_in);
1258 if (commas && is("punc", ",")) {
1260 return as("seq", expr, expression(true, no_in));
1265 function in_loop(cont) {
1274 return as("toplevel", (function(a){
1276 a.push(statement());
1282 /* -----[ Utilities ]----- */
1285 var args = slice(arguments, 1);
1286 return function() { return f.apply(this, args.concat(slice(arguments))); };
1289 function prog1(ret) {
1290 if (ret instanceof Function)
1292 for (var i = 1, n = arguments.length; --n > 0; ++i)
1297 function array_to_hash(a) {
1299 for (var i = 0; i < a.length; ++i)
1304 function slice(a, start) {
1305 return Array.prototype.slice.call(a, start || 0);
1308 function characters(str) {
1309 return str.split("");
1312 function member(name, array) {
1313 for (var i = array.length; --i >= 0;)
1314 if (array[i] == name)
1319 function HOP(obj, prop) {
1320 return Object.prototype.hasOwnProperty.call(obj, prop);
1323 var warn = function() {};
1325 /* -----[ Exports ]----- */
1327 exports.tokenizer = tokenizer;
1328 exports.parse = parse;
1329 exports.slice = slice;
1330 exports.curry = curry;
1331 exports.member = member;
1332 exports.array_to_hash = array_to_hash;
1333 exports.PRECEDENCE = PRECEDENCE;
1334 exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
1335 exports.RESERVED_WORDS = RESERVED_WORDS;
1336 exports.KEYWORDS = KEYWORDS;
1337 exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
1338 exports.OPERATORS = OPERATORS;
1339 exports.is_alphanumeric_char = is_alphanumeric_char;
1340 exports.set_logger = function(logger) {