3 # Copyright 2012 the V8 project authors. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following
12 # disclaimer in the documentation and/or other materials provided
13 # with the distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 """A JavaScript minifier.
32 It is far from being a complete JS parser, so there are many valid
33 JavaScript programs that will be ruined by it. Another strangeness is that
34 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
35 out blank lines in order to ease debugging. Variables at the top scope are
36 properties of the global object so we can't rename them. It is assumed that
37 you introduce variables with var as if JavaScript followed C++ scope rules
38 around curly braces, so the declaration must be above the first use.
42 minifier = JavaScriptMinifier()
43 program1 = minifier.JSMinify(program1)
44 program2 = minifier.JSMinify(program2)
50 class JavaScriptMinifier(object):
51 """An object that you can feed code snippets to to get them minified."""
54 # We prepopulate the list of identifiers that shouldn't be used. These
55 # short language keywords could otherwise be used by the script as variable
57 self.seen_identifiers = {"do": True, "in": True}
58 self.identifier_counter = 0
59 self.in_comment = False
63 def LookAtIdentifier(self, m):
64 """Records identifiers or keywords that we see in use.
66 (So we can avoid renaming variables to these strings.)
68 m: The match object returned by re.search.
73 identifier = m.group(1)
74 self.seen_identifiers[identifier] = True
77 """Called when we encounter a '{'."""
81 """Called when we encounter a '}'."""
83 # We treat each top-level opening brace as a single scope that can span
84 # several sets of nested braces.
87 self.identifier_counter = 0
89 def Declaration(self, m):
90 """Rewrites bits of the program selected by a regexp.
92 These can be curly braces, literal strings, function declarations and var
93 declarations. (These last two must be on one line including the opening
94 curly brace of the function for their variables to be renamed).
97 m: The match object returned by re.search.
100 The string that should replace the match in the rewritten program.
102 matched_text = m.group(0)
103 if matched_text == "{":
106 if matched_text == "}":
109 if re.match("[\"'/]", matched_text):
111 m = re.match(r"var ", matched_text)
113 var_names = matched_text[m.end():]
114 var_names = re.split(r",", var_names)
115 return "var " + ",".join(map(self.FindNewName, var_names))
116 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
118 up_to_args = m.group(1)
120 args = re.split(r",", args)
122 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
124 if matched_text in self.map:
125 return self.map[matched_text]
129 def CharFromNumber(self, number):
130 """A single-digit base-52 encoding using a-zA-Z."""
132 return chr(number + 97)
134 return chr(number + 65)
136 def FindNewName(self, var_name):
137 """Finds a new 1-character or 2-character name for a variable.
139 Enters it into the mapping table for this scope.
142 var_name: The name of the variable before renaming.
145 The new name of the variable.
148 # Variable names that end in _ are member variables of the global object,
149 # so they can be visible from code in a different scope. We leave them
151 if var_name in self.map:
152 return self.map[var_name]
153 if self.nesting == 0:
156 identifier_first_char = self.identifier_counter % 52
157 identifier_second_char = self.identifier_counter // 52
158 new_identifier = self.CharFromNumber(identifier_first_char)
159 if identifier_second_char != 0:
161 self.CharFromNumber(identifier_second_char - 1) + new_identifier)
162 self.identifier_counter += 1
163 if not new_identifier in self.seen_identifiers:
166 self.map[var_name] = new_identifier
167 return new_identifier
169 def RemoveSpaces(self, m):
170 """Returns literal strings unchanged, replaces other inputs with group 2.
172 Other inputs are replaced with the contents of capture 1. This is either
173 a single space or an empty string.
176 m: The match object returned by re.search.
179 The string that should be inserted instead of the matched text.
181 entire_match = m.group(0)
182 replacement = m.group(1)
183 if re.match(r"'.*'$", entire_match):
185 if re.match(r'".*"$', entire_match):
187 if re.match(r"/.+/$", entire_match):
191 def JSMinify(self, text):
192 """The main entry point. Takes a text and returns a compressed version.
194 The compressed version hopefully does the same thing. Line breaks are
198 text: The text of the code snippet as a multiline string.
201 The compressed text of the code snippet as a multiline string.
204 for line in re.split(r"\n", text):
205 line = line.replace("\t", " ")
207 m = re.search(r"\*/", line)
209 line = line[m.end():]
210 self.in_comment = False
215 if not self.in_comment:
216 line = re.sub(r"/\*.*?\*/", " ", line)
217 line = re.sub(r"//.*", "", line)
218 m = re.search(r"/\*", line)
220 line = line[:m.start()]
221 self.in_comment = True
223 # Strip leading and trailing spaces.
224 line = re.sub(r"^ +", "", line)
225 line = re.sub(r" +$", "", line)
226 # A regexp that matches a literal string surrounded by "double quotes".
227 # This regexp can handle embedded backslash-escaped characters including
228 # embedded backslash-escaped double quotes.
229 double_quoted_string = r'"(?:[^"\\]|\\.)*"'
230 # A regexp that matches a literal string surrounded by 'double quotes'.
231 single_quoted_string = r"'(?:[^'\\]|\\.)*'"
232 # A regexp that matches a regexp literal surrounded by /slashes/.
233 # Don't allow a regexp to have a ) before the first ( since that's a
234 # syntax error and it's probably just two unrelated slashes.
235 # Also don't allow it to come after anything that can only be the
236 # end of a primary expression.
237 slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/"
238 # Replace multiple spaces with a single space.
239 line = re.sub("|".join([double_quoted_string,
240 single_quoted_string,
245 # Strip single spaces unless they have an identifier character both before
246 # and after the space. % and $ are counted as identifier characters.
247 line = re.sub("|".join([double_quoted_string,
248 single_quoted_string,
250 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
253 # Collect keywords and identifiers that are already in use.
254 if self.nesting == 0:
255 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
256 function_declaration_regexp = (
257 r"\bfunction" # Function definition keyword...
258 r"( [\w$%]+)?" # ...optional function name...
259 r"\([\w$%,]+\)\{") # ...argument declarations.
260 # Unfortunately the keyword-value syntax { key:value } makes the key look
261 # like a variable where in fact it is a literal string. We use the
262 # presence or absence of a question mark to try to distinguish between
263 # this case and the ternary operator: "condition ? iftrue : iffalse".
264 if re.search(r"\?", line):
265 block_trailing_colon = r""
267 block_trailing_colon = r"(?![:\w$%])"
268 # Variable use. Cannot follow a period precede a colon.
269 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
270 line = re.sub("|".join([double_quoted_string,
271 single_quoted_string,
273 r"\{", # Curly braces.
275 r"\bvar [\w$%,]+", # var declarations.
276 function_declaration_regexp,
277 variable_use_regexp]),
280 new_lines.append(line)
282 return "\n".join(new_lines) + "\n"