3 # Copyright 2012 the V8 project authors. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following
12 # disclaimer in the documentation and/or other materials provided
13 # with the distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 # This is a utility for converting JavaScript source code into C-style
31 # char arrays. It is used for embedded JavaScript code in the V8
34 import os, re, sys, string
41 class Error(Exception):
42 def __init__(self, msg):
43 Exception.__init__(self, msg)
46 def ToCArray(byte_sequence):
48 for chr in byte_sequence:
49 result.append(str(ord(chr)))
50 joined = ", ".join(result)
51 return textwrap.fill(joined, 80)
54 def RemoveCommentsAndTrailingWhitespace(lines):
55 lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
56 lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
57 lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
61 def ReadFile(filename):
62 file = open(filename, "rt")
70 EVAL_PATTERN = re.compile(r'\beval\s*\(')
71 WITH_PATTERN = re.compile(r'\bwith\s*\(')
74 # Because of simplified context setup, eval and with is not
75 # allowed in the natives files.
76 if EVAL_PATTERN.search(lines):
77 raise Error("Eval disallowed in natives.")
78 if WITH_PATTERN.search(lines):
79 raise Error("With statements disallowed in natives.")
81 # Pass lines through unchanged.
85 def ExpandConstants(lines, constants):
86 for key, value in constants:
87 lines = key.sub(str(value), lines)
91 def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
92 pattern_match = name_pattern.search(lines, pos)
93 while pattern_match is not None:
94 # Scan over the arguments
96 start = pattern_match.start()
97 end = pattern_match.end()
98 assert lines[end - 1] == '('
100 arg_index = [0] # Wrap state into array, to work around Python "scoping"
103 # Remember to expand recursively in the arguments
104 replacement = expander(str.strip())
105 mapping[macro.args[arg_index[0]]] = replacement
107 while end < len(lines) and height > 0:
108 # We don't count commas at higher nesting levels.
109 if lines[end] == ',' and height == 1:
110 add_arg(lines[last_match:end])
112 elif lines[end] in ['(', '{', '[']:
114 elif lines[end] in [')', '}', ']']:
117 # Remember to add the last match.
118 add_arg(lines[last_match:end-1])
119 result = macro.expand(mapping)
120 # Replace the occurrence of the macro with the expansion
121 lines = lines[:start] + result + lines[end:]
122 pattern_match = name_pattern.search(lines, start + len(result))
125 def ExpandMacros(lines, macros):
126 # We allow macros to depend on the previously declared macros, but
127 # we don't allow self-dependecies or recursion.
128 for name_pattern, macro in reversed(macros):
130 return ExpandMacros(s, macros)
131 lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
135 def __init__(self, args, body):
138 def expand(self, mapping):
140 for key, value in mapping.items():
141 result = result.replace(key, value)
145 def __init__(self, args, fun):
148 def expand(self, mapping):
150 for arg in self.args:
151 args.append(mapping[arg])
152 return str(self.fun(*args))
154 CONST_PATTERN = re.compile(r'^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
155 MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
156 PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
159 def ReadMacros(lines):
162 for line in lines.split('\n'):
163 hash = line.find('#')
164 if hash != -1: line = line[:hash]
166 if len(line) is 0: continue
167 const_match = CONST_PATTERN.match(line)
169 name = const_match.group(1)
170 value = const_match.group(2).strip()
171 constants.append((re.compile("\\b%s\\b" % name), value))
173 macro_match = MACRO_PATTERN.match(line)
175 name = macro_match.group(1)
176 args = [match.strip() for match in macro_match.group(2).split(',')]
177 body = macro_match.group(3).strip()
178 macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
180 python_match = PYTHON_MACRO_PATTERN.match(line)
182 name = python_match.group(1)
183 args = [match.strip() for match in python_match.group(2).split(',')]
184 body = python_match.group(3).strip()
185 fun = eval("lambda " + ",".join(args) + ': ' + body)
186 macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
188 raise Error("Illegal line: " + line)
189 return (constants, macros)
191 INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
192 INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
194 def ExpandInlineMacros(lines):
197 macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
198 if macro_match is None:
201 name = macro_match.group(1)
202 args = [match.strip() for match in macro_match.group(2).split(',')]
203 end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
204 if end_macro_match is None:
205 raise Error("Macro %s unclosed" % name)
206 body = lines[macro_match.end():end_macro_match.start()]
208 # remove macro definition
209 lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
210 name_pattern = re.compile("\\b%s\\(" % name)
211 macro = TextMacro(args, body)
213 # advance position to where the macro defintion was
214 pos = macro_match.start()
218 lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
221 INLINE_CONSTANT_PATTERN = re.compile(r'const\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+)[;\n]')
223 def ExpandInlineConstants(lines):
226 const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
227 if const_match is None:
230 name = const_match.group(1)
231 replacement = const_match.group(2)
232 name_pattern = re.compile("\\b%s\\b" % name)
234 # remove constant definition and replace
235 lines = (lines[:const_match.start()] +
236 re.sub(name_pattern, replacement, lines[const_match.end():]))
238 # advance position to where the constant defintion was
239 pos = const_match.start()
242 HEADER_TEMPLATE = """\
243 // Copyright 2011 Google Inc. All Rights Reserved.
245 // This file was generated from .js source files by GYP. If you
246 // want to make changes to this file you should either change the
247 // javascript source files or the GYP script.
250 #include "src/natives.h"
251 #include "src/utils.h"
256 %(sources_declaration)s\
258 %(raw_sources_declaration)s\
261 int NativesCollection<%(type)s>::GetBuiltinsCount() {
262 return %(builtin_count)i;
266 int NativesCollection<%(type)s>::GetDebuggerCount() {
267 return %(debugger_count)i;
271 int NativesCollection<%(type)s>::GetIndex(const char* name) {
277 int NativesCollection<%(type)s>::GetRawScriptsSize() {
278 return %(raw_total_length)i;
282 Vector<const char> NativesCollection<%(type)s>::GetRawScriptSource(int index) {
283 %(get_raw_script_source_cases)s\
284 return Vector<const char>("", 0);
288 Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
289 %(get_script_name_cases)s\
290 return Vector<const char>("", 0);
294 Vector<const byte> NativesCollection<%(type)s>::GetScriptsSource() {
295 return Vector<const byte>(sources, %(total_length)i);
299 void NativesCollection<%(type)s>::SetRawScriptsSource(Vector<const char> raw_source) {
300 DCHECK(%(raw_total_length)i == raw_source.length());
301 raw_sources = raw_source.start();
308 SOURCES_DECLARATION = """\
309 static const byte sources[] = { %s };
313 RAW_SOURCES_COMPRESSION_DECLARATION = """\
314 static const char* raw_sources = NULL;
318 RAW_SOURCES_DECLARATION = """\
319 static const char* raw_sources = reinterpret_cast<const char*>(sources);
323 GET_INDEX_CASE = """\
324 if (strcmp(name, "%(id)s") == 0) return %(i)i;
328 GET_RAW_SCRIPT_SOURCE_CASE = """\
329 if (index == %(i)i) return Vector<const char>(raw_sources + %(offset)i, %(raw_length)i);
333 GET_SCRIPT_NAME_CASE = """\
334 if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
338 def BuildFilterChain(macro_filename):
339 """Build the chain of filter functions to be applied to the sources.
342 macro_filename: Name of the macro file, if any.
345 A function (string -> string) that reads a source file and processes it.
347 filter_chain = [ReadFile]
350 (consts, macros) = ReadMacros(ReadFile(macro_filename))
351 filter_chain.append(lambda l: ExpandConstants(l, consts))
352 filter_chain.append(lambda l: ExpandMacros(l, macros))
354 filter_chain.extend([
355 RemoveCommentsAndTrailingWhitespace,
357 ExpandInlineConstants,
359 jsmin.JavaScriptMinifier().JSMinify
363 return lambda x: f2(f1(x))
365 return reduce(chain, filter_chain)
372 self.is_debugger_id = []
375 def IsDebuggerFile(filename):
376 return filename.endswith("-debugger.js")
378 def IsMacroFile(filename):
379 return filename.endswith("macros.py")
382 def PrepareSources(source_files):
383 """Read, prepare and assemble the list of source files.
386 sources: List of Javascript-ish source files. A file named macros.py
387 will be treated as a list of macros.
390 An instance of Sources.
393 macro_files = filter(IsMacroFile, source_files)
394 assert len(macro_files) in [0, 1]
396 source_files.remove(macro_files[0])
397 macro_file = macro_files[0]
399 filters = BuildFilterChain(macro_file)
401 # Sort 'debugger' sources first.
402 source_files = sorted(source_files,
403 lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
406 for source in source_files:
408 lines = filters(source)
410 raise Error("In file %s:\n%s" % (source, str(e)))
412 result.modules.append(lines);
414 is_debugger = IsDebuggerFile(source)
415 result.is_debugger_id.append(is_debugger);
417 name = os.path.basename(source)[:-3]
418 result.names.append(name if not is_debugger else name[:-9]);
422 def BuildMetadata(sources, source_bytes, native_type):
423 """Build the meta data required to generate a libaries file.
426 sources: A Sources instance with the prepared sources.
427 source_bytes: A list of source bytes.
428 (The concatenation of all sources; might be compressed.)
429 native_type: The parameter for the NativesCollection template.
432 A dictionary for use with HEADER_TEMPLATE.
434 total_length = len(source_bytes)
435 raw_sources = "".join(sources.modules)
437 # The sources are expected to be ASCII-only.
438 assert not filter(lambda value: ord(value) >= 128, raw_sources)
440 # Loop over modules and build up indices into the source blob:
442 get_script_name_cases = []
443 get_raw_script_source_cases = []
445 for i in xrange(len(sources.modules)):
446 native_name = "native %s.js" % sources.names[i]
449 "id": sources.names[i],
451 "length": len(native_name),
453 "raw_length": len(sources.modules[i]),
455 get_index_cases.append(GET_INDEX_CASE % d)
456 get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
457 get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % d)
458 offset += len(sources.modules[i])
459 assert offset == len(raw_sources)
461 # If we have the raw sources we can declare them accordingly.
462 have_raw_sources = source_bytes == raw_sources
463 raw_sources_declaration = (RAW_SOURCES_DECLARATION
464 if have_raw_sources else RAW_SOURCES_COMPRESSION_DECLARATION)
467 "builtin_count": len(sources.modules),
468 "debugger_count": sum(sources.is_debugger_id),
469 "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
470 "raw_sources_declaration": raw_sources_declaration,
471 "raw_total_length": sum(map(len, sources.modules)),
472 "total_length": total_length,
473 "get_index_cases": "".join(get_index_cases),
474 "get_raw_script_source_cases": "".join(get_raw_script_source_cases),
475 "get_script_name_cases": "".join(get_script_name_cases),
481 def CompressMaybe(sources, compression_type):
482 """Take the prepared sources and generate a sequence of bytes.
485 sources: A Sources instance with the prepared sourced.
486 compression_type: string, describing the desired compression.
491 sources_bytes = "".join(sources.modules)
492 if compression_type == "off":
494 elif compression_type == "bz2":
495 return bz2.compress(sources_bytes)
497 raise Error("Unknown compression type %s." % compression_type)
500 def PutInt(blob_file, value):
501 assert(value >= 0 and value < (1 << 20))
502 size = 1 if (value < 1 << 6) else (2 if (value < 1 << 14) else 3)
503 value_with_length = (value << 2) | size
505 byte_sequence = bytearray()
506 for i in xrange(size):
507 byte_sequence.append(value_with_length & 255)
508 value_with_length >>= 8;
509 blob_file.write(byte_sequence)
512 def PutStr(blob_file, value):
513 PutInt(blob_file, len(value));
514 blob_file.write(value);
517 def WriteStartupBlob(sources, startup_blob):
518 """Write a startup blob, as expected by V8 Initialize ...
519 TODO(vogelheim): Add proper method name.
522 sources: A Sources instance with the prepared sources.
523 startup_blob_file: Name of file to write the blob to.
525 output = open(startup_blob, "wb")
527 debug_sources = sum(sources.is_debugger_id);
528 PutInt(output, debug_sources)
529 for i in xrange(debug_sources):
530 PutStr(output, sources.names[i]);
531 PutStr(output, sources.modules[i]);
533 PutInt(output, len(sources.names) - debug_sources)
534 for i in xrange(debug_sources, len(sources.names)):
535 PutStr(output, sources.names[i]);
536 PutStr(output, sources.modules[i]);
541 def JS2C(source, target, native_type, compression_type, raw_file, startup_blob):
542 sources = PrepareSources(source)
543 sources_bytes = CompressMaybe(sources, compression_type)
544 metadata = BuildMetadata(sources, sources_bytes, native_type)
546 # Optionally emit raw file.
548 output = open(raw_file, "w")
549 output.write(sources_bytes)
553 WriteStartupBlob(sources, startup_blob);
555 # Emit resulting source file.
556 output = open(target, "w")
557 output.write(HEADER_TEMPLATE % metadata)
562 parser = optparse.OptionParser()
563 parser.add_option("--raw", action="store",
564 help="file to write the processed sources array to.")
565 parser.add_option("--startup_blob", action="store",
566 help="file to write the startup blob to.")
567 parser.set_usage("""js2c out.cc type compression sources.js ...
568 out.cc: C code to be generated.
569 type: type parameter for NativesCollection template.
570 compression: type of compression used. [off|bz2]
571 sources.js: JS internal sources or macros.py.""")
572 (options, args) = parser.parse_args()
574 JS2C(args[3:], args[0], args[1], args[2], options.raw, options.startup_blob)
577 if __name__ == "__main__":