2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 """Replays web pages under simulated network conditions.
18 Must be run as administrator (sudo).
21 1. Start the program in record mode.
22 $ sudo ./replay.py --record archive.wpr
23 2. Load the web pages you want to record in a web browser. It is important to
24 clear browser caches before this so that all subresources are requested
26 3. Kill the process to stop recording.
29 1. Start the program in replay mode with a previously recorded archive.
30 $ sudo ./replay.py archive.wpr
31 2. Load recorded pages in a web browser. A 404 will be served for any pages or
32 resources not in the recorded archive.
34 Network simulation examples:
35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
50 import cachemissarchive
57 import platformsettings
58 import replayspdyserver
59 import script_injector
63 if sys.version < '2.6':
64 print 'Need Python 2.6 or greater.'
68 def configure_logging(log_level_name, log_file_name=None):
69 """Configure logging level and format.
72 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
73 log_file_name: a file name
75 if logging.root.handlers:
76 logging.critical('A logging method (e.g. "logging.warn(...)")'
77 ' was called before logging was configured.')
78 log_level = getattr(logging, log_level_name.upper())
79 log_format = '%(asctime)s %(levelname)s %(message)s'
80 logging.basicConfig(level=log_level, format=log_format)
81 logger = logging.getLogger()
83 fh = logging.FileHandler(log_file_name)
84 fh.setLevel(log_level)
85 fh.setFormatter(logging.Formatter(log_format))
87 system_handler = platformsettings.get_system_logging_handler()
89 logger.addHandler(system_handler)
92 def AddDnsForward(server_manager, host):
93 """Forward DNS traffic."""
94 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
97 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
100 if options.dns_private_passthrough:
101 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
102 dns_filters.append(private_filter)
103 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
104 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
105 if options.shaping_dns:
106 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
107 dns_filters.append(delay_filter)
108 server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
109 server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
110 server_manager.Append(dnsproxy.DnsProxyServer, host, port,
111 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
114 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive,
116 inject_script = script_injector.GetInjectScript(options.inject_scripts)
117 custom_handlers = customhandlers.CustomHandlers(options, http_archive)
119 assert not options.record, 'spdy cannot be used with --record.'
120 archive_fetch = httpclient.ReplayHttpArchiveFetch(
121 http_archive, real_dns_lookup,
123 options.diff_unknown_requests,
124 cache_misses=cache_misses,
125 use_closest_match=options.use_closest_match,
126 scramble_images=options.scramble_images)
127 server_manager.Append(
128 replayspdyserver.ReplaySpdyServer, archive_fetch,
129 custom_handlers, host=host, port=options.port,
130 certfile=options.https_root_ca_cert_path)
132 custom_handlers.add_server_manager_handler(server_manager)
133 archive_fetch = httpclient.ControllableHttpArchiveFetch(
134 http_archive, real_dns_lookup,
136 options.diff_unknown_requests, options.record,
137 cache_misses=cache_misses, use_closest_match=options.use_closest_match,
138 scramble_images=options.scramble_images)
139 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
140 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
141 server_manager.Append(
142 httpproxy.HttpProxyServer,
143 archive_fetch, custom_handlers,
144 host=host, port=options.port, use_delays=options.use_server_delay,
145 **options.shaping_http)
147 if options.should_generate_certs:
148 server_manager.Append(
149 httpproxy.HttpsProxyServer, archive_fetch, custom_handlers,
150 options.https_root_ca_cert_path, host=host, port=options.ssl_port,
151 use_delays=options.use_server_delay, **options.shaping_http)
153 server_manager.Append(
154 httpproxy.SingleCertHttpsProxyServer, archive_fetch,
155 custom_handlers, options.https_root_ca_cert_path, host=host,
156 port=options.ssl_port, use_delays=options.use_server_delay,
157 **options.shaping_http)
158 if options.http_to_https_port:
159 server_manager.Append(
160 httpproxy.HttpToHttpsProxyServer,
161 archive_fetch, custom_handlers,
162 host=host, port=options.http_to_https_port,
163 use_delays=options.use_server_delay,
164 **options.shaping_http)
167 def AddTrafficShaper(server_manager, options, host):
168 if options.shaping_dummynet:
169 server_manager.AppendTrafficShaper(
170 trafficshaper.TrafficShaper, host=host,
171 use_loopback=not options.server_mode and host == '127.0.0.1',
172 **options.shaping_dummynet)
175 class OptionsWrapper(object):
176 """Add checks, updates, and methods to option values.
179 options, args = option_parser.parse_args()
180 options = OptionsWrapper(options, option_parser) # run checks and updates
181 if options.record and options.HasTrafficShaping():
184 _TRAFFICSHAPING_OPTIONS = set(
185 ['down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'])
186 _CONFLICTING_OPTIONS = (
187 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
188 'spdy', 'use_server_delay')),
189 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
190 'spdy', 'use_server_delay')), # same as --record
191 ('net', ('down', 'up', 'delay_ms')),
192 ('server', ('server_mode',)),
195 def __init__(self, options, parser):
196 self._options = options
197 self._parser = parser
198 self._nondefaults = set([
199 name for name, value in parser.defaults.items()
200 if getattr(options, name) != value])
201 self._CheckConflicts()
202 self._CheckValidIp('host')
203 self._MassageValues()
205 def _CheckConflicts(self):
206 """Give an error if mutually exclusive options are used."""
207 for option, bad_options in self._CONFLICTING_OPTIONS:
208 if option in self._nondefaults:
209 for bad_option in bad_options:
210 if bad_option in self._nondefaults:
211 self._parser.error('Option --%s cannot be used with --%s.' %
212 (bad_option, option))
214 def _CheckValidIp(self, name):
215 """Give an error if option |name| is not a valid IPv4 address."""
216 value = getattr(self._options, name)
219 socket.inet_aton(value)
221 self._parser.error('Option --%s must be a valid IPv4 address.' % name)
223 def _ShapingKeywordArgs(self, shaping_key):
224 """Return the shaping keyword args for |shaping_key|.
227 shaping_key: one of 'dummynet', 'dns', 'http'.
229 {} # if shaping_key does not apply, or options have default values.
233 def AddItemIfSet(d, kw_key, opt_key=None):
234 opt_key = opt_key or kw_key
235 if opt_key in self._nondefaults:
236 d[kw_key] = getattr(self, opt_key)
237 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
238 self.shaping_type == shaping_key):
239 AddItemIfSet(kwargs, 'delay_ms')
240 if shaping_key in ('dummynet', 'http'):
241 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
242 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
243 if shaping_key == 'dummynet':
244 AddItemIfSet(kwargs, 'packet_loss_rate')
245 AddItemIfSet(kwargs, 'init_cwnd')
246 elif self.shaping_type != 'none':
247 if 'packet_loss_rate' in self._nondefaults:
248 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
249 self.shaping_type, self.packet_loss_rate)
250 if 'init_cwnd' in self._nondefaults:
251 logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
252 self.shaping_type, self.init_cwnd)
255 def _MassageValues(self):
256 """Set options that depend on the values of other options."""
257 if self.append and not self.record:
258 self._options.record = True
260 self._options.down, self._options.up, self._options.delay_ms = \
261 net_configs.GetNetConfig(self.net)
262 self._nondefaults.update(['down', 'up', 'delay_ms'])
264 self._options.https_root_ca_cert_path = None
265 self.shaping_dns = self._ShapingKeywordArgs('dns')
266 self.shaping_http = self._ShapingKeywordArgs('http')
267 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
269 def __getattr__(self, name):
270 """Make the original option values available."""
271 return getattr(self._options, name)
274 """Return a json representation of the original options dictionary."""
275 return json.dumps(self._options.__dict__)
277 def IsRootRequired(self):
278 """Returns True iff the options require whole program root access."""
282 def IsPrivilegedPort(port):
283 return port and port < 1024
285 if IsPrivilegedPort(self.port) or (self.ssl and
286 IsPrivilegedPort(self.ssl_port)):
289 if self.dns_forwarding:
290 if IsPrivilegedPort(self.dns_port):
292 if not self.server_mode and self.host == '127.0.0.1':
298 def replay(options, replay_filename):
299 if options.admin_check and options.IsRootRequired():
300 platformsettings.rerun_as_administrator()
301 configure_logging(options.log_level, options.log_file)
302 server_manager = servermanager.ServerManager(options.record)
304 if options.cache_miss_file:
305 if os.path.exists(options.cache_miss_file):
306 logging.warning('Cache Miss Archive file %s already exists; '
307 'replay will load and append entries to archive file',
308 options.cache_miss_file)
309 cache_misses = cachemissarchive.CacheMissArchive.Load(
310 options.cache_miss_file)
312 cache_misses = cachemissarchive.CacheMissArchive(
313 options.cache_miss_file)
315 AddDnsForward(server_manager, options.server)
317 real_dns_lookup = dnsproxy.RealDnsLookup(
318 name_servers=[platformsettings.get_original_primary_nameserver()])
320 httparchive.HttpArchive.AssertWritable(replay_filename)
321 if options.append and os.path.exists(replay_filename):
322 http_archive = httparchive.HttpArchive.Load(replay_filename)
323 logging.info('Appending to %s (loaded %d existing responses)',
324 replay_filename, len(http_archive))
326 http_archive = httparchive.HttpArchive()
328 http_archive = httparchive.HttpArchive.Load(replay_filename)
329 logging.info('Loaded %d responses from %s',
330 len(http_archive), replay_filename)
331 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
332 server_manager.AppendRecordCallback(http_archive.clear)
335 if options.dns_forwarding or options.shaping_dummynet:
336 # compute the ip/host used for the DNS server and traffic shaping
337 ipfw_dns_host = options.host
338 if not ipfw_dns_host:
339 ipfw_dns_host = platformsettings.get_server_ip_address(
342 if options.dns_forwarding:
343 if not options.server_mode and ipfw_dns_host == '127.0.0.1':
344 AddDnsForward(server_manager, ipfw_dns_host)
345 AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
346 real_dns_lookup, http_archive)
347 if options.ssl and options.https_root_ca_cert_path is None:
348 options.https_root_ca_cert_path = os.path.join(os.path.dirname(__file__),
350 http_proxy_address = options.host
351 if not http_proxy_address:
352 http_proxy_address = platformsettings.get_httpproxy_ip_address(
354 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
355 http_archive, cache_misses)
356 AddTrafficShaper(server_manager, options, ipfw_dns_host)
361 except KeyboardInterrupt:
362 logging.info('Shutting down.')
363 except (dnsproxy.DnsProxyException,
364 trafficshaper.TrafficShaperException,
365 platformsettings.NotAdministratorError,
366 platformsettings.DnsUpdateError) as e:
367 logging.critical('%s: %s', e.__class__.__name__, e)
370 logging.critical(traceback.format_exc())
374 http_archive.Persist(replay_filename)
375 logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
377 cache_misses.Persist()
378 logging.info('Saved %d cache misses and %d requests to %s',
379 cache_misses.get_total_cache_misses(),
380 len(cache_misses.request_counts.keys()),
381 options.cache_miss_file)
385 def GetOptionParser():
386 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
387 def format_description(self, description):
389 return description + '\n'
392 option_parser = optparse.OptionParser(
393 usage='%prog [options] replay_file',
394 formatter=PlainHelpFormatter(),
396 epilog='http://code.google.com/p/web-page-replay/')
398 option_parser.add_option('--spdy', default=False,
400 help='Replay via SPDY. (Can be combined with --no-ssl).')
401 option_parser.add_option('-r', '--record', default=False,
403 help='Download real responses and record them to replay_file')
404 option_parser.add_option('--append', default=False,
406 help='Append responses to replay_file.')
407 option_parser.add_option('-l', '--log_level', default='debug',
410 choices=('debug', 'info', 'warning', 'error', 'critical'),
411 help='Minimum verbosity level to log')
412 option_parser.add_option('-f', '--log_file', default=None,
415 help='Log file to use in addition to writting logs to stderr.')
416 option_parser.add_option('-e', '--cache_miss_file', default=None,
418 dest='cache_miss_file',
420 help='Archive file to record cache misses as pickled objects.'
421 'Cache misses occur when a request cannot be served in replay mode.')
423 network_group = optparse.OptionGroup(option_parser,
424 'Network Simulation Options',
425 'These options configure the network simulation in replay mode')
426 network_group.add_option('-u', '--up', default='0',
429 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
430 network_group.add_option('-d', '--down', default='0',
433 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
434 network_group.add_option('-m', '--delay_ms', default='0',
437 help='Propagation delay (latency) in milliseconds. Zero means no delay.')
438 network_group.add_option('-p', '--packet_loss_rate', default='0',
441 help='Packet loss rate in range [0..1]. Zero means no loss.')
442 network_group.add_option('-w', '--init_cwnd', default='0',
445 help='Set initial cwnd (linux only, requires kernel patch)')
446 network_group.add_option('--net', default=None,
449 choices=net_configs.NET_CONFIG_NAMES,
450 help='Select a set of network options: %s.' % ', '.join(
451 net_configs.NET_CONFIG_NAMES))
452 network_group.add_option('--shaping_type', default='dummynet',
454 choices=('dummynet', 'proxy'),
455 help='When shaping is configured (i.e. --up, --down, etc.) decides '
456 'whether to use |dummynet| (default), or |proxy| servers.')
457 option_parser.add_option_group(network_group)
459 harness_group = optparse.OptionGroup(option_parser,
460 'Replay Harness Options',
461 'These advanced options configure various aspects of the replay harness')
462 harness_group.add_option('-S', '--server', default=None,
465 help='IP address of host running "replay.py --server_mode". '
466 'This only changes the primary DNS nameserver to use the given IP.')
467 harness_group.add_option('-M', '--server_mode', default=False,
469 help='Run replay DNS & http proxies, and trafficshaping on --port '
470 'without changing the primary DNS nameserver. '
471 'Other hosts may connect to this using "replay.py --server" '
472 'or by pointing their DNS to this server.')
473 harness_group.add_option('-i', '--inject_scripts', default='deterministic.js',
475 dest='inject_scripts',
476 help='A comma separated list of JavaScript sources to inject in all '
477 'pages. By default a script is injected that eliminates sources '
478 'of entropy such as Date() and Math.random() deterministic. '
479 'CAUTION: Without deterministic.js, many pages will not replay.')
480 harness_group.add_option('-D', '--no-diff_unknown_requests', default=True,
481 action='store_false',
482 dest='diff_unknown_requests',
483 help='During replay, do not show a diff of unknown requests against '
484 'their nearest match in the archive.')
485 harness_group.add_option('-C', '--use_closest_match', default=False,
487 dest='use_closest_match',
488 help='During replay, if a request is not found, serve the closest match'
489 'in the archive instead of giving a 404.')
490 harness_group.add_option('-U', '--use_server_delay', default=False,
492 dest='use_server_delay',
493 help='During replay, simulate server delay by delaying response time to'
495 harness_group.add_option('-I', '--screenshot_dir', default=None,
498 help='Save PNG images of the loaded page in the given directory.')
499 harness_group.add_option('-P', '--no-dns_private_passthrough', default=True,
500 action='store_false',
501 dest='dns_private_passthrough',
502 help='Don\'t forward DNS requests that resolve to private network '
503 'addresses. CAUTION: With this option important services like '
504 'Kerberos will resolve to the HTTP proxy address.')
505 harness_group.add_option('-x', '--no-dns_forwarding', default=True,
506 action='store_false',
507 dest='dns_forwarding',
508 help='Don\'t forward DNS requests to the local replay server. '
509 'CAUTION: With this option an external mechanism must be used to '
510 'forward traffic to the replay server.')
511 harness_group.add_option('--host', default=None,
514 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
515 '127.0.0.1, depending on --server_mode and platform.')
516 harness_group.add_option('-o', '--port', default=80,
519 help='Port number to listen on.')
520 harness_group.add_option('--ssl_port', default=443,
523 help='SSL port number to listen on.')
524 harness_group.add_option('--http_to_https_port', default=None,
527 help='Port on which WPR will listen for HTTP requests that it will send '
528 'along as HTTPS requests.')
529 harness_group.add_option('--dns_port', default=53,
532 help='DNS port number to listen on.')
533 harness_group.add_option('-c', '--https_root_ca_cert_path', default=None,
536 help='Certificate file to use with SSL (gets auto-generated if needed).')
537 harness_group.add_option('--no-ssl', default=True,
538 action='store_false',
540 help='Do not setup an SSL proxy.')
541 option_parser.add_option_group(harness_group)
542 harness_group.add_option('--should_generate_certs', default=False,
544 help='Use OpenSSL to generate certificate files for requested hosts.')
545 harness_group.add_option('--no-admin-check', default=True,
546 action='store_false',
548 help='Do not check if administrator access is needed.')
549 harness_group.add_option('--scramble_images', default=False,
551 dest='scramble_images',
552 help='Scramble image responses.')
557 option_parser = GetOptionParser()
558 options, args = option_parser.parse_args()
559 options = OptionsWrapper(options, option_parser)
562 replay_filename = None
564 option_parser.error('Must specify a replay_file')
566 replay_filename = args[0]
568 return replay(options, replay_filename)
571 if __name__ == '__main__':