From 98390928742740bc5dffac3b10bbb10f807ce659 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Wed, 23 Sep 2009 12:32:24 +0000 Subject: [PATCH] * Remove non-Open Source code from Douglas Crockford. * Be more var-correct in JS files. * Rename some JS variables to reflect the fact that they are instance variables on the global intrinsics object. * Missing optimization in StringCharAt. Review URL: http://codereview.chromium.org/215052 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2959 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- LICENSE | 4 - src/array.js | 4 +- src/debug-delay.js | 4 +- src/messages.js | 17 +- src/mirror-delay.js | 2 - src/string.js | 78 ++++----- src/uri.js | 10 +- tools/js2c.py | 20 +-- tools/jsmin.py | 496 +++++++++++++++++++++++++++++----------------------- 9 files changed, 339 insertions(+), 296 deletions(-) diff --git a/LICENSE b/LICENSE index d2862b4..f94039e 100644 --- a/LICENSE +++ b/LICENSE @@ -21,10 +21,6 @@ are: This code is copyrighted by Sun Microsystems Inc. and released under a 3-clause BSD license. - - JSMin JavaScript minifier, located at tools/jsmin.py. This code is - copyrighted by Douglas Crockford and Baruch Even and released under - an MIT license. - - Valgrind client API header, located at third_party/valgrind/valgrind.h This is release under the BSD license. diff --git a/src/array.js b/src/array.js index eb69f97..f8e63d0 100644 --- a/src/array.js +++ b/src/array.js @@ -709,6 +709,8 @@ function ArraySort(comparefn) { QuickSort(a, high_start, to); } + var length; + // Copies elements in the range 0..length from obj's prototype chain // to obj itself, if obj has holes. Returns one more than the maximal index // of a prototype property. @@ -826,7 +828,7 @@ function ArraySort(comparefn) { return first_undefined; } - var length = ToUint32(this.length); + length = ToUint32(this.length); if (length < 2) return this; var is_array = IS_ARRAY(this); diff --git a/src/debug-delay.js b/src/debug-delay.js index ce70c75..189279b 100644 --- a/src/debug-delay.js +++ b/src/debug-delay.js @@ -25,8 +25,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// jsminify this file, js2c: jsmin - // Default number of frames to include in the response to backtrace request. const kDefaultBacktraceLength = 10; @@ -35,7 +33,7 @@ const Debug = {}; // Regular expression to skip "crud" at the beginning of a source line which is // not really code. Currently the regular expression matches whitespace and // comments. -const sourceLineBeginningSkip = /^(?:[ \v\h]*(?:\/\*.*?\*\/)*)*/; +const sourceLineBeginningSkip = /^(?:\s*(?:\/\*.*?\*\/)*)*/; // Debug events which can occour in the V8 JavaScript engine. These originate // from the API include file debug.h. diff --git a/src/messages.js b/src/messages.js index 6513067..2720792 100644 --- a/src/messages.js +++ b/src/messages.js @@ -32,6 +32,11 @@ var kVowelSounds = 0; var kCapitalVowelSounds = 0; +// If this object gets passed to an error constructor the error will +// get an accessor for .message that constructs a descriptive error +// message on access. +var kAddMessageAccessorsMarker = { }; + function GetInstanceName(cons) { if (cons.length == 0) { @@ -565,11 +570,6 @@ function GetStackTraceLine(recv, fun, pos, isGlobal) { // ---------------------------------------------------------------------------- // Error implementation -// If this object gets passed to an error constructor the error will -// get an accessor for .message that constructs a descriptive error -// message on access. -var kAddMessageAccessorsMarker = { }; - // Defines accessors for a property that is calculated the first time // the property is read. function DefineOneShotAccessor(obj, name, fun) { @@ -781,14 +781,15 @@ function FormatStackTrace(error, frames) { } for (var i = 0; i < frames.length; i++) { var frame = frames[i]; + var line; try { - var line = FormatSourcePosition(frame); + line = FormatSourcePosition(frame); } catch (e) { try { - var line = ""; + line = ""; } catch (ee) { // Any code that reaches this point is seriously nasty! - var line = ""; + line = ""; } } lines.push(" at " + line); diff --git a/src/mirror-delay.js b/src/mirror-delay.js index ee3dd64..c4ab7b8 100644 --- a/src/mirror-delay.js +++ b/src/mirror-delay.js @@ -25,8 +25,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// jsminify this file, js2c: jsmin - // Touch the RegExp and Date functions to make sure that date-delay.js and // regexp-delay.js has been loaded. This is required as the mirrors use // functions within these files through the builtins object. diff --git a/src/string.js b/src/string.js index 263fac5..fbdc307 100644 --- a/src/string.js +++ b/src/string.js @@ -62,7 +62,7 @@ function StringValueOf() { // ECMA-262, section 15.5.4.4 function StringCharAt(pos) { - var char_code = %_FastCharCodeAt(this, index); + var char_code = %_FastCharCodeAt(this, pos); if (!%_IsSmi(char_code)) { var subject = ToString(this); var index = TO_INTEGER(pos); @@ -184,6 +184,14 @@ function SubString(string, start, end) { } +// This has the same size as the lastMatchInfo array, and can be used for +// functions that expect that structure to be returned. It is used when the +// needle is a string rather than a regexp. In this case we can't update +// lastMatchArray without erroneously affecting the properties on the global +// RegExp object. +var reusableMatchInfo = [2, "", "", -1, -1]; + + // ECMA-262, section 15.5.4.11 function StringReplace(search, replace) { var subject = ToString(this); @@ -224,14 +232,6 @@ function StringReplace(search, replace) { } -// This has the same size as the lastMatchInfo array, and can be used for -// functions that expect that structure to be returned. It is used when the -// needle is a string rather than a regexp. In this case we can't update -// lastMatchArray without erroneously affecting the properties on the global -// RegExp object. -var reusableMatchInfo = [2, "", "", -1, -1]; - - // Helper function for regular expressions in String.prototype.replace. function StringReplaceRegExp(subject, regexp, replace) { replace = ToString(replace); @@ -370,8 +370,8 @@ function addCaptureString(builder, matchInfo, index) { // 'abcd'.replace(/(.)/g, function() { return RegExp.$1; } // should be 'abcd' and not 'dddd' (or anything else). function StringReplaceRegExpWithFunction(subject, regexp, replace) { - var lastMatchInfo = DoRegExpExec(regexp, subject, 0); - if (IS_NULL(lastMatchInfo)) return subject; + var matchInfo = DoRegExpExec(regexp, subject, 0); + if (IS_NULL(matchInfo)) return subject; var result = new ReplaceResultBuilder(subject); // There's at least one match. If the regexp is global, we have to loop @@ -382,11 +382,11 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { if (regexp.global) { var previous = 0; do { - result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]); - var startOfMatch = lastMatchInfo[CAPTURE0]; - previous = lastMatchInfo[CAPTURE1]; - result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); - // Can't use lastMatchInfo any more from here, since the function could + result.addSpecialSlice(previous, matchInfo[CAPTURE0]); + var startOfMatch = matchInfo[CAPTURE0]; + previous = matchInfo[CAPTURE1]; + result.add(ApplyReplacementFunction(replace, matchInfo, subject)); + // Can't use matchInfo any more from here, since the function could // overwrite it. // Continue with the next match. // Increment previous if we matched an empty string, as per ECMA-262 @@ -401,20 +401,20 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { // Per ECMA-262 15.10.6.2, if the previous index is greater than the // string length, there is no match - lastMatchInfo = (previous > subject.length) + matchInfo = (previous > subject.length) ? null : DoRegExpExec(regexp, subject, previous); - } while (!IS_NULL(lastMatchInfo)); + } while (!IS_NULL(matchInfo)); // Tack on the final right substring after the last match, if necessary. if (previous < subject.length) { result.addSpecialSlice(previous, subject.length); } } else { // Not a global regexp, no need to loop. - result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]); - var endOfMatch = lastMatchInfo[CAPTURE1]; - result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); - // Can't use lastMatchInfo any more from here, since the function could + result.addSpecialSlice(0, matchInfo[CAPTURE0]); + var endOfMatch = matchInfo[CAPTURE1]; + result.add(ApplyReplacementFunction(replace, matchInfo, subject)); + // Can't use matchInfo any more from here, since the function could // overwrite it. result.addSpecialSlice(endOfMatch, subject.length); } @@ -424,20 +424,20 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { // Helper function to apply a string replacement function once. -function ApplyReplacementFunction(replace, lastMatchInfo, subject) { +function ApplyReplacementFunction(replace, matchInfo, subject) { // Compute the parameter list consisting of the match, captures, index, // and subject for the replace function invocation. - var index = lastMatchInfo[CAPTURE0]; + var index = matchInfo[CAPTURE0]; // The number of captures plus one for the match. - var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; + var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; if (m == 1) { - var s = CaptureString(subject, lastMatchInfo, 0); + var s = CaptureString(subject, matchInfo, 0); // Don't call directly to avoid exposing the built-in global object. return replace.call(null, s, index, subject); } var parameters = $Array(m + 2); for (var j = 0; j < m; j++) { - parameters[j] = CaptureString(subject, lastMatchInfo, j); + parameters[j] = CaptureString(subject, matchInfo, j); } parameters[j] = index; parameters[j + 1] = subject; @@ -539,14 +539,14 @@ function StringSplit(separator, limit) { return result; } - var lastMatchInfo = splitMatch(separator, subject, currentIndex, startIndex); + var matchInfo = splitMatch(separator, subject, currentIndex, startIndex); - if (IS_NULL(lastMatchInfo)) { + if (IS_NULL(matchInfo)) { result[result.length] = subject.slice(currentIndex, length); return result; } - var endIndex = lastMatchInfo[CAPTURE1]; + var endIndex = matchInfo[CAPTURE1]; // We ignore a zero-length match at the currentIndex. if (startIndex === endIndex && endIndex === currentIndex) { @@ -554,12 +554,12 @@ function StringSplit(separator, limit) { continue; } - result[result.length] = SubString(subject, currentIndex, lastMatchInfo[CAPTURE0]); + result[result.length] = SubString(subject, currentIndex, matchInfo[CAPTURE0]); if (result.length === limit) return result; - for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) { - var start = lastMatchInfo[CAPTURE(i)]; - var end = lastMatchInfo[CAPTURE(i + 1)]; + for (var i = 2; i < NUMBER_OF_CAPTURES(matchInfo); i += 2) { + var start = matchInfo[CAPTURE(i)]; + var end = matchInfo[CAPTURE(i + 1)]; if (start != -1 && end != -1) { result[result.length] = SubString(subject, start, end); } else { @@ -574,16 +574,16 @@ function StringSplit(separator, limit) { // ECMA-262 section 15.5.4.14 -// Helper function used by split. This version returns the lastMatchInfo +// Helper function used by split. This version returns the matchInfo // instead of allocating a new array with basically the same information. function splitMatch(separator, subject, current_index, start_index) { if (IS_REGEXP(separator)) { - var lastMatchInfo = DoRegExpExec(separator, subject, start_index); - if (lastMatchInfo == null) return null; + var matchInfo = DoRegExpExec(separator, subject, start_index); + if (matchInfo == null) return null; // Section 15.5.4.14 paragraph two says that we do not allow zero length // matches at the end of the string. - if (lastMatchInfo[CAPTURE0] === subject.length) return null; - return lastMatchInfo; + if (matchInfo[CAPTURE0] === subject.length) return null; + return matchInfo; } var separatorIndex = subject.indexOf(separator, start_index); diff --git a/src/uri.js b/src/uri.js index 0dfe765..5af71b6 100644 --- a/src/uri.js +++ b/src/uri.js @@ -30,6 +30,11 @@ // Expect $String = global.String; +// Lazily initialized. +var hexCharArray = 0; +var hexCharCodeArray = 0; + + function URIAddEncodedOctetToBuffer(octet, result, index) { result[index++] = 37; // Char code of '%'. result[index++] = hexCharCodeArray[octet >> 4]; @@ -320,11 +325,6 @@ function URIEncodeComponent(component) { } -// Lazily initialized. -var hexCharArray = 0; -var hexCharCodeArray = 0; - - function HexValueOf(c) { var code = c.charCodeAt(0); diff --git a/tools/js2c.py b/tools/js2c.py index cae39e8..2b7dbdf 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -52,20 +52,6 @@ def RemoveCommentsAndTrailingWhitespace(lines): return lines -def CompressScript(lines, do_jsmin): - # If we're not expecting this code to be user visible, we can run it through - # a more aggressive minifier. - if do_jsmin: - return jsmin.jsmin(lines) - - # Remove stuff from the source that we don't want to appear when - # people print the source code using Function.prototype.toString(). - # Note that we could easily compress the scripts mode but don't - # since we want it to remain readable. - lines = RemoveCommentsAndTrailingWhitespace(lines) - return lines - - def ReadFile(filename): file = open(filename, "rt") try: @@ -295,16 +281,18 @@ def JS2C(source, target, env): # Build source code lines source_lines = [ ] + + minifier = jsmin.JavaScriptMinifier() + source_lines_empty = [] for module in modules: filename = str(module) delay = filename.endswith('-delay.js') lines = ReadFile(filename) - do_jsmin = lines.find('// jsminify this file, js2c: jsmin') != -1 lines = ExpandConstants(lines, consts) lines = ExpandMacros(lines, macros) Validate(lines, filename) - lines = CompressScript(lines, do_jsmin) + lines = minifier.JSMinify(lines) data = ToCArray(lines) id = (os.path.split(filename)[1])[:-3] if delay: id = id[:-6] diff --git a/tools/jsmin.py b/tools/jsmin.py index ae75814..fd1abe4 100644 --- a/tools/jsmin.py +++ b/tools/jsmin.py @@ -1,218 +1,278 @@ -#!/usr/bin/python - -# This code is original from jsmin by Douglas Crockford, it was translated to -# Python by Baruch Even. The original code had the following copyright and -# license. -# -# /* jsmin.c -# 2007-05-22 -# -# Copyright (c) 2002 Douglas Crockford (www.crockford.com) -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# The Software shall be used for Good, not Evil. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# */ - -from StringIO import StringIO - -def jsmin(js): - ins = StringIO(js) - outs = StringIO() - JavascriptMinify().minify(ins, outs) - str = outs.getvalue() - if len(str) > 0 and str[0] == '\n': - str = str[1:] - return str - -def isAlphanum(c): - """return true if the character is a letter, digit, underscore, - dollar sign, or non-ASCII character. - """ - return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or - (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126)); - -class UnterminatedComment(Exception): - pass - -class UnterminatedStringLiteral(Exception): - pass - -class UnterminatedRegularExpression(Exception): - pass - -class JavascriptMinify(object): - - def _outA(self): - self.outstream.write(self.theA) - def _outB(self): - self.outstream.write(self.theB) - - def _get(self): - """return the next character from stdin. Watch out for lookahead. If - the character is a control character, translate it to a space or - linefeed. - """ - c = self.theLookahead - self.theLookahead = None - if c == None: - c = self.instream.read(1) - if c >= ' ' or c == '\n': - return c - if c == '': # EOF - return '\000' - if c == '\r': - return '\n' - return ' ' - - def _peek(self): - self.theLookahead = self._get() - return self.theLookahead - - def _next(self): - """get the next character, excluding comments. peek() is used to see - if an unescaped '/' is followed by a '/' or '*'. - """ - c = self._get() - if c == '/' and self.theA != '\\': - p = self._peek() - if p == '/': - c = self._get() - while c > '\n': - c = self._get() - return c - if p == '*': - c = self._get() - while 1: - c = self._get() - if c == '*': - if self._peek() == '/': - self._get() - return ' ' - if c == '\000': - raise UnterminatedComment() - - return c - - def _action(self, action): - """do something! What you do is determined by the argument: - 1 Output A. Copy B to A. Get the next B. - 2 Copy B to A. Get the next B. (Delete A). - 3 Get the next B. (Delete B). - action treats a string as a single character. Wow! - action recognizes a regular expression if it is preceded by ( or , or =. - """ - if action <= 1: - self._outA() - - if action <= 2: - self.theA = self.theB - if self.theA == "'" or self.theA == '"': - while 1: - self._outA() - self.theA = self._get() - if self.theA == self.theB: - break - if self.theA <= '\n': - raise UnterminatedStringLiteral() - if self.theA == '\\': - self._outA() - self.theA = self._get() - - - if action <= 3: - self.theB = self._next() - if self.theB == '/' and (self.theA == '(' or self.theA == ',' or - self.theA == '=' or self.theA == ':' or - self.theA == '[' or self.theA == '?' or - self.theA == '!' or self.theA == '&' or - self.theA == '|' or self.theA == ';' or - self.theA == '{' or self.theA == '}' or - self.theA == '\n'): - self._outA() - self._outB() - while 1: - self.theA = self._get() - if self.theA == '/': - break - elif self.theA == '\\': - self._outA() - self.theA = self._get() - elif self.theA <= '\n': - raise UnterminatedRegularExpression() - self._outA() - self.theB = self._next() - - - def _jsmin(self): - """Copy the input to the output, deleting the characters which are - insignificant to JavaScript. Comments will be removed. Tabs will be - replaced with spaces. Carriage returns will be replaced with linefeeds. - Most spaces and linefeeds will be removed. - """ - self.theA = '\n' - self._action(3) - - while self.theA != '\000': - if self.theA == ' ': - if isAlphanum(self.theB): - self._action(1) - else: - self._action(2) - elif self.theA == '\n': - if self.theB in ['{', '[', '(', '+', '-']: - self._action(1) - elif self.theB == ' ': - self._action(3) - else: - if isAlphanum(self.theB): - self._action(1) - else: - self._action(2) - else: - if self.theB == ' ': - if isAlphanum(self.theA): - self._action(1) - else: - self._action(3) - elif self.theB == '\n': - if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: - self._action(1) - else: - if isAlphanum(self.theA): - self._action(1) - else: - self._action(3) - else: - self._action(1) - - def minify(self, instream, outstream): - self.instream = instream - self.outstream = outstream - self.theA = '\n' - self.theB = None - self.theLookahead = None - - self._jsmin() - self.instream.close() - -if __name__ == '__main__': - import sys - jsm = JavascriptMinify() - jsm.minify(sys.stdin, sys.stdout) +#!/usr/bin/python2.4 + +# Copyright 2009 the V8 project authors. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""A JavaScript minifier. + +It is far from being a complete JS parser, so there are many valid +JavaScript programs that will be ruined by it. Another strangeness is that +it accepts $ and % as parts of identifiers. It doesn't merge lines or strip +out blank lines in order to ease debugging. Variables at the top scope are +properties of the global object so we can't rename them. It is assumed that +you introduce variables with var as if JavaScript followed C++ scope rules +around curly braces, so the declaration must be above the first use. + +Use as: +import jsmin +minifier = JavaScriptMinifier() +program1 = minifier.JSMinify(program1) +program2 = minifier.JSMinify(program2) +""" + +import re + + +class JavaScriptMinifier(object): + """An object that you can feed code snippets to to get them minified.""" + + def __init__(self): + # We prepopulate the list of identifiers that shouldn't be used. These + # short language keywords could otherwise be used by the script as variable + # names. + self.seen_identifiers = {"do": True, "in": True} + self.identifier_counter = 0 + self.in_comment = False + self.map = {} + self.nesting = 0 + + def LookAtIdentifier(self, m): + """Records identifiers or keywords that we see in use. + + (So we can avoid renaming variables to these strings.) + Args: + m: The match object returned by re.search. + + Returns: + Nothing. + """ + identifier = m.group(1) + self.seen_identifiers[identifier] = True + + def Push(self): + """Called when we encounter a '{'.""" + self.nesting += 1 + + def Pop(self): + """Called when we encounter a '}'.""" + self.nesting -= 1 + # We treat each top-level opening brace as a single scope that can span + # several sets of nested braces. + if self.nesting == 0: + self.map = {} + self.identifier_counter = 0 + + def Declaration(self, m): + """Rewrites bits of the program selected by a regexp. + + These can be curly braces, literal strings, function declarations and var + declarations. (These last two must be on one line including the opening + curly brace of the function for their variables to be renamed). + + Args: + m: The match object returned by re.search. + + Returns: + The string that should replace the match in the rewritten program. + """ + matched_text = m.group(0) + if matched_text == "{": + self.Push() + return matched_text + if matched_text == "}": + self.Pop() + return matched_text + if re.match("[\"'/]", matched_text): + return matched_text + m = re.match(r"var ", matched_text) + if m: + var_names = matched_text[m.end():] + var_names = re.split(r",", var_names) + return "var " + ",".join(map(self.FindNewName, var_names)) + m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) + if m: + up_to_args = m.group(1) + args = m.group(2) + args = re.split(r",", args) + self.Push() + return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" + + if matched_text in self.map: + return self.map[matched_text] + + return matched_text + + def CharFromNumber(self, number): + """A single-digit base-52 encoding using a-zA-Z.""" + if number < 26: + return chr(number + 97) + number -= 26 + return chr(number + 65) + + def FindNewName(self, var_name): + """Finds a new 1-character or 2-character name for a variable. + + Enters it into the mapping table for this scope. + + Args: + var_name: The name of the variable before renaming. + + Returns: + The new name of the variable. + """ + new_identifier = "" + # Variable names that end in _ are member variables of the global object, + # so they can be visible from code in a different scope. We leave them + # alone. + if var_name in self.map: + return self.map[var_name] + if self.nesting == 0: + return var_name + while True: + identifier_first_char = self.identifier_counter % 52 + identifier_second_char = self.identifier_counter / 52 + new_identifier = self.CharFromNumber(identifier_first_char) + if identifier_second_char != 0: + new_identifier = ( + self.CharFromNumber(identifier_second_char - 1) + new_identifier) + self.identifier_counter += 1 + if not new_identifier in self.seen_identifiers: + break + + self.map[var_name] = new_identifier + return new_identifier + + def RemoveSpaces(self, m): + """Returns literal strings unchanged, replaces other inputs with group 2. + + Other inputs are replaced with the contents of capture 1. This is either + a single space or an empty string. + + Args: + m: The match object returned by re.search. + + Returns: + The string that should be inserted instead of the matched text. + """ + entire_match = m.group(0) + replacement = m.group(1) + if re.match(r"'.*'$", entire_match): + return entire_match + if re.match(r'".*"$', entire_match): + return entire_match + if re.match(r"/.+/$", entire_match): + return entire_match + return replacement + + def JSMinify(self, text): + """The main entry point. Takes a text and returns a compressed version. + + The compressed version hopefully does the same thing. Line breaks are + preserved. + + Args: + text: The text of the code snippet as a multiline string. + + Returns: + The compressed text of the code snippet as a multiline string. + """ + new_lines = [] + for line in re.split(r"\n", text): + line = line.replace("\t", " ") + if self.in_comment: + m = re.search(r"\*/", line) + if m: + line = line[m.end():] + self.in_comment = False + else: + new_lines.append("") + continue + + if not self.in_comment: + line = re.sub(r"/\*.*?\*/", " ", line) + line = re.sub(r"//.*", "", line) + m = re.search(r"/\*", line) + if m: + line = line[:m.start()] + self.in_comment = True + + # Strip leading and trailing spaces. + line = re.sub(r"^ +", "", line) + line = re.sub(r" +$", "", line) + # A regexp that matches a literal string surrounded by "double quotes". + # This regexp can handle embedded backslash-escaped characters including + # embedded backslash-escaped double quotes. + double_quoted_string = r'"(?:[^"\\]|\\.)*"' + # A regexp that matches a literal string surrounded by 'double quotes'. + single_quoted_string = r"'(?:[^'\\]|\\.)*'" + # A regexp that matches a regexp literal surrounded by /slashes/. + slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/" + # Replace multiple spaces with a single space. + line = re.sub("|".join([double_quoted_string, + single_quoted_string, + slash_quoted_regexp, + "( )+"]), + self.RemoveSpaces, + line) + # Strip single spaces unless they have an identifier character both before + # and after the space. % and $ are counted as identifier characters. + line = re.sub("|".join([double_quoted_string, + single_quoted_string, + slash_quoted_regexp, + r"(?