2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 """Replays web pages under simulated network conditions.
18 Must be run as administrator (sudo).
21 1. Start the program in record mode.
22 $ sudo ./replay.py --record archive.wpr
23 2. Load the web pages you want to record in a web browser. It is important to
24 clear browser caches before this so that all subresources are requested
26 3. Kill the process to stop recording.
29 1. Start the program in replay mode with a previously recorded archive.
30 $ sudo ./replay.py archive.wpr
31 2. Load recorded pages in a web browser. A 404 will be served for any pages or
32 resources not in the recorded archive.
34 Network simulation examples:
35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
50 import cachemissarchive
57 import platformsettings
58 import replayspdyserver
59 import script_injector
63 if sys.version < '2.6':
64 print 'Need Python 2.6 or greater.'
68 def configure_logging(log_level_name, log_file_name=None):
69 """Configure logging level and format.
72 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
73 log_file_name: a file name
75 if logging.root.handlers:
76 logging.critical('A logging method (e.g. "logging.warn(...)")'
77 ' was called before logging was configured.')
78 log_level = getattr(logging, log_level_name.upper())
79 log_format = '%(asctime)s %(levelname)s %(message)s'
80 logging.basicConfig(level=log_level, format=log_format)
81 logger = logging.getLogger()
83 fh = logging.FileHandler(log_file_name)
84 fh.setLevel(log_level)
85 fh.setFormatter(logging.Formatter(log_format))
87 system_handler = platformsettings.get_system_logging_handler()
89 logger.addHandler(system_handler)
92 def AddDnsForward(server_manager, host):
93 """Forward DNS traffic."""
94 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
97 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
100 if options.dns_private_passthrough:
101 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
102 dns_filters.append(private_filter)
103 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
104 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
105 if options.shaping_dns:
106 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
107 dns_filters.append(delay_filter)
108 server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
109 server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
110 server_manager.Append(dnsproxy.DnsProxyServer, host, port,
111 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
114 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive,
116 inject_script = script_injector.GetInjectScript(options.inject_scripts)
117 custom_handlers = customhandlers.CustomHandlers(options, http_archive)
119 assert not options.record, 'spdy cannot be used with --record.'
120 archive_fetch = httpclient.ReplayHttpArchiveFetch(
121 http_archive, real_dns_lookup,
123 options.diff_unknown_requests,
124 cache_misses=cache_misses,
125 use_closest_match=options.use_closest_match,
126 scramble_images=options.scramble_images)
127 server_manager.Append(
128 replayspdyserver.ReplaySpdyServer, archive_fetch,
129 custom_handlers, host=host, port=options.port,
130 certfile=options.certfile)
132 custom_handlers.add_server_manager_handler(server_manager)
133 archive_fetch = httpclient.ControllableHttpArchiveFetch(
134 http_archive, real_dns_lookup,
136 options.diff_unknown_requests, options.record,
137 cache_misses=cache_misses, use_closest_match=options.use_closest_match,
138 scramble_images=options.scramble_images)
139 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
140 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
141 server_manager.Append(
142 httpproxy.HttpProxyServer,
143 archive_fetch, custom_handlers,
144 host=host, port=options.port, use_delays=options.use_server_delay,
145 **options.shaping_http)
147 server_manager.Append(
148 httpproxy.HttpsProxyServer,
149 archive_fetch, custom_handlers, options.certfile,
150 host=host, port=options.ssl_port, use_delays=options.use_server_delay,
151 **options.shaping_http)
152 if options.http_to_https_port:
153 server_manager.Append(
154 httpproxy.HttpToHttpsProxyServer,
155 archive_fetch, custom_handlers,
156 host=host, port=options.http_to_https_port,
157 use_delays=options.use_server_delay,
158 **options.shaping_http)
161 def AddTrafficShaper(server_manager, options, host):
162 if options.shaping_dummynet:
163 server_manager.AppendTrafficShaper(
164 trafficshaper.TrafficShaper, host=host,
165 use_loopback=not options.server_mode and host == '127.0.0.1',
166 **options.shaping_dummynet)
169 class OptionsWrapper(object):
170 """Add checks, updates, and methods to option values.
173 options, args = option_parser.parse_args()
174 options = OptionsWrapper(options, option_parser) # run checks and updates
175 if options.record and options.HasTrafficShaping():
178 _TRAFFICSHAPING_OPTIONS = set(
179 ['down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'])
180 _CONFLICTING_OPTIONS = (
181 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
182 'spdy', 'use_server_delay')),
183 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
184 'spdy', 'use_server_delay')), # same as --record
185 ('net', ('down', 'up', 'delay_ms')),
186 ('server', ('server_mode',)),
189 def __init__(self, options, parser):
190 self._options = options
191 self._parser = parser
192 self._nondefaults = set([
193 name for name, value in parser.defaults.items()
194 if getattr(options, name) != value])
195 self._CheckConflicts()
196 self._CheckValidIp('host')
197 self._MassageValues()
199 def _CheckConflicts(self):
200 """Give an error if mutually exclusive options are used."""
201 for option, bad_options in self._CONFLICTING_OPTIONS:
202 if option in self._nondefaults:
203 for bad_option in bad_options:
204 if bad_option in self._nondefaults:
205 self._parser.error('Option --%s cannot be used with --%s.' %
206 (bad_option, option))
208 def _CheckValidIp(self, name):
209 """Give an error if option |name| is not a valid IPv4 address."""
210 value = getattr(self._options, name)
213 socket.inet_aton(value)
215 self._parser.error('Option --%s must be a valid IPv4 address.' % name)
217 def _ShapingKeywordArgs(self, shaping_key):
218 """Return the shaping keyword args for |shaping_key|.
221 shaping_key: one of 'dummynet', 'dns', 'http'.
223 {} # if shaping_key does not apply, or options have default values.
227 def AddItemIfSet(d, kw_key, opt_key=None):
228 opt_key = opt_key or kw_key
229 if opt_key in self._nondefaults:
230 d[kw_key] = getattr(self, opt_key)
231 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
232 self.shaping_type == shaping_key):
233 AddItemIfSet(kwargs, 'delay_ms')
234 if shaping_key in ('dummynet', 'http'):
235 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
236 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
237 if shaping_key == 'dummynet':
238 AddItemIfSet(kwargs, 'packet_loss_rate')
239 AddItemIfSet(kwargs, 'init_cwnd')
240 elif self.shaping_type != 'none':
241 if 'packet_loss_rate' in self._nondefaults:
242 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
243 self.shaping_type, self.packet_loss_rate)
244 if 'init_cwnd' in self._nondefaults:
245 logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
246 self.shaping_type, self.init_cwnd)
249 def _MassageValues(self):
250 """Set options that depend on the values of other options."""
251 if self.append and not self.record:
252 self._options.record = True
254 self._options.down, self._options.up, self._options.delay_ms = \
255 net_configs.GetNetConfig(self.net)
256 self._nondefaults.update(['down', 'up', 'delay_ms'])
258 self._options.certfile = None
259 self.shaping_dns = self._ShapingKeywordArgs('dns')
260 self.shaping_http = self._ShapingKeywordArgs('http')
261 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
263 def __getattr__(self, name):
264 """Make the original option values available."""
265 return getattr(self._options, name)
268 """Return a json representation of the original options dictionary."""
269 return json.dumps(self._options.__dict__)
271 def IsRootRequired(self):
272 """Returns True iff the options require whole program root access."""
276 def IsPrivilegedPort(port):
277 return port and port < 1024
279 if IsPrivilegedPort(self.port) or (self.ssl and
280 IsPrivilegedPort(self.ssl_port)):
283 if self.dns_forwarding:
284 if IsPrivilegedPort(self.dns_port):
286 if not self.server_mode and self.host == '127.0.0.1':
292 def replay(options, replay_filename):
293 if options.admin_check and options.IsRootRequired():
294 platformsettings.rerun_as_administrator()
295 configure_logging(options.log_level, options.log_file)
296 server_manager = servermanager.ServerManager(options.record)
298 if options.cache_miss_file:
299 if os.path.exists(options.cache_miss_file):
300 logging.warning('Cache Miss Archive file %s already exists; '
301 'replay will load and append entries to archive file',
302 options.cache_miss_file)
303 cache_misses = cachemissarchive.CacheMissArchive.Load(
304 options.cache_miss_file)
306 cache_misses = cachemissarchive.CacheMissArchive(
307 options.cache_miss_file)
309 AddDnsForward(server_manager, options.server)
311 real_dns_lookup = dnsproxy.RealDnsLookup(
312 name_servers=[platformsettings.get_original_primary_nameserver()])
314 httparchive.HttpArchive.AssertWritable(replay_filename)
315 if options.append and os.path.exists(replay_filename):
316 http_archive = httparchive.HttpArchive.Load(replay_filename)
317 logging.info('Appending to %s (loaded %d existing responses)',
318 replay_filename, len(http_archive))
320 http_archive = httparchive.HttpArchive()
322 http_archive = httparchive.HttpArchive.Load(replay_filename)
323 logging.info('Loaded %d responses from %s',
324 len(http_archive), replay_filename)
325 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
326 server_manager.AppendRecordCallback(http_archive.clear)
329 if options.dns_forwarding or options.shaping_dummynet:
330 # compute the ip/host used for the DNS server and traffic shaping
331 ipfw_dns_host = options.host
332 if not ipfw_dns_host:
333 ipfw_dns_host = platformsettings.get_server_ip_address(
336 if options.dns_forwarding:
337 if not options.server_mode and ipfw_dns_host == '127.0.0.1':
338 AddDnsForward(server_manager, ipfw_dns_host)
339 AddDnsProxy(server_manager, options, ipfw_dns_host, options.dns_port,
340 real_dns_lookup, http_archive)
341 if options.ssl and options.certfile is None:
342 options.certfile = os.path.join(os.path.dirname(__file__), 'wpr_cert.pem')
343 http_proxy_address = options.host
344 if not http_proxy_address:
345 http_proxy_address = platformsettings.get_httpproxy_ip_address(
347 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
348 http_archive, cache_misses)
349 AddTrafficShaper(server_manager, options, ipfw_dns_host)
354 except KeyboardInterrupt:
355 logging.info('Shutting down.')
356 except (dnsproxy.DnsProxyException,
357 trafficshaper.TrafficShaperException,
358 platformsettings.NotAdministratorError,
359 platformsettings.DnsUpdateError) as e:
360 logging.critical('%s: %s', e.__class__.__name__, e)
363 logging.critical(traceback.format_exc())
367 http_archive.Persist(replay_filename)
368 logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
370 cache_misses.Persist()
371 logging.info('Saved %d cache misses and %d requests to %s',
372 cache_misses.get_total_cache_misses(),
373 len(cache_misses.request_counts.keys()),
374 options.cache_miss_file)
378 def GetOptionParser():
379 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
380 def format_description(self, description):
382 return description + '\n'
385 option_parser = optparse.OptionParser(
386 usage='%prog [options] replay_file',
387 formatter=PlainHelpFormatter(),
389 epilog='http://code.google.com/p/web-page-replay/')
391 option_parser.add_option('--spdy', default=False,
393 help='Replay via SPDY. (Can be combined with --no-ssl).')
394 option_parser.add_option('-r', '--record', default=False,
396 help='Download real responses and record them to replay_file')
397 option_parser.add_option('--append', default=False,
399 help='Append responses to replay_file.')
400 option_parser.add_option('-l', '--log_level', default='debug',
403 choices=('debug', 'info', 'warning', 'error', 'critical'),
404 help='Minimum verbosity level to log')
405 option_parser.add_option('-f', '--log_file', default=None,
408 help='Log file to use in addition to writting logs to stderr.')
409 option_parser.add_option('-e', '--cache_miss_file', default=None,
411 dest='cache_miss_file',
413 help='Archive file to record cache misses as pickled objects.'
414 'Cache misses occur when a request cannot be served in replay mode.')
416 network_group = optparse.OptionGroup(option_parser,
417 'Network Simulation Options',
418 'These options configure the network simulation in replay mode')
419 network_group.add_option('-u', '--up', default='0',
422 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
423 network_group.add_option('-d', '--down', default='0',
426 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
427 network_group.add_option('-m', '--delay_ms', default='0',
430 help='Propagation delay (latency) in milliseconds. Zero means no delay.')
431 network_group.add_option('-p', '--packet_loss_rate', default='0',
434 help='Packet loss rate in range [0..1]. Zero means no loss.')
435 network_group.add_option('-w', '--init_cwnd', default='0',
438 help='Set initial cwnd (linux only, requires kernel patch)')
439 network_group.add_option('--net', default=None,
442 choices=net_configs.NET_CONFIG_NAMES,
443 help='Select a set of network options: %s.' % ', '.join(
444 net_configs.NET_CONFIG_NAMES))
445 network_group.add_option('--shaping_type', default='dummynet',
447 choices=('dummynet', 'proxy'),
448 help='When shaping is configured (i.e. --up, --down, etc.) decides '
449 'whether to use |dummynet| (default), or |proxy| servers.')
450 option_parser.add_option_group(network_group)
452 harness_group = optparse.OptionGroup(option_parser,
453 'Replay Harness Options',
454 'These advanced options configure various aspects of the replay harness')
455 harness_group.add_option('-S', '--server', default=None,
458 help='IP address of host running "replay.py --server_mode". '
459 'This only changes the primary DNS nameserver to use the given IP.')
460 harness_group.add_option('-M', '--server_mode', default=False,
462 help='Run replay DNS & http proxies, and trafficshaping on --port '
463 'without changing the primary DNS nameserver. '
464 'Other hosts may connect to this using "replay.py --server" '
465 'or by pointing their DNS to this server.')
466 harness_group.add_option('-i', '--inject_scripts', default='deterministic.js',
468 dest='inject_scripts',
469 help='A comma separated list of JavaScript sources to inject in all '
470 'pages. By default a script is injected that eliminates sources '
471 'of entropy such as Date() and Math.random() deterministic. '
472 'CAUTION: Without deterministic.js, many pages will not replay.')
473 harness_group.add_option('-D', '--no-diff_unknown_requests', default=True,
474 action='store_false',
475 dest='diff_unknown_requests',
476 help='During replay, do not show a diff of unknown requests against '
477 'their nearest match in the archive.')
478 harness_group.add_option('-C', '--use_closest_match', default=False,
480 dest='use_closest_match',
481 help='During replay, if a request is not found, serve the closest match'
482 'in the archive instead of giving a 404.')
483 harness_group.add_option('-U', '--use_server_delay', default=False,
485 dest='use_server_delay',
486 help='During replay, simulate server delay by delaying response time to'
488 harness_group.add_option('-I', '--screenshot_dir', default=None,
491 help='Save PNG images of the loaded page in the given directory.')
492 harness_group.add_option('-P', '--no-dns_private_passthrough', default=True,
493 action='store_false',
494 dest='dns_private_passthrough',
495 help='Don\'t forward DNS requests that resolve to private network '
496 'addresses. CAUTION: With this option important services like '
497 'Kerberos will resolve to the HTTP proxy address.')
498 harness_group.add_option('-x', '--no-dns_forwarding', default=True,
499 action='store_false',
500 dest='dns_forwarding',
501 help='Don\'t forward DNS requests to the local replay server. '
502 'CAUTION: With this option an external mechanism must be used to '
503 'forward traffic to the replay server.')
504 harness_group.add_option('--host', default=None,
507 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
508 '127.0.0.1, depending on --server_mode and platform.')
509 harness_group.add_option('-o', '--port', default=80,
512 help='Port number to listen on.')
513 harness_group.add_option('--ssl_port', default=443,
516 help='SSL port number to listen on.')
517 harness_group.add_option('--http_to_https_port', default=None,
520 help='Port on which WPR will listen for HTTP requests that it will send '
521 'along as HTTPS requests.')
522 harness_group.add_option('--dns_port', default=53,
525 help='DNS port number to listen on.')
526 harness_group.add_option('-c', '--certfile', default=None,
529 help='Certificate file to use with SSL (gets auto-generated if needed).')
530 harness_group.add_option('--no-ssl', default=True,
531 action='store_false',
533 help='Do not setup an SSL proxy.')
534 option_parser.add_option_group(harness_group)
535 harness_group.add_option('--no-admin-check', default=True,
536 action='store_false',
538 help='Do not check if administrator access is needed.')
539 harness_group.add_option('--scramble_images', default=False,
541 dest='scramble_images',
542 help='Scramble image responses.')
547 option_parser = GetOptionParser()
548 options, args = option_parser.parse_args()
549 options = OptionsWrapper(options, option_parser)
552 replay_filename = None
554 option_parser.error('Must specify a replay_file')
556 replay_filename = args[0]
558 return replay(options, replay_filename)
561 if __name__ == '__main__':