2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 """Replays web pages under simulated network conditions.
18 Must be run as administrator (sudo).
21 1. Start the program in record mode.
22 $ sudo ./replay.py --record archive.wpr
23 2. Load the web pages you want to record in a web browser. It is important to
24 clear browser caches before this so that all subresources are requested
26 3. Kill the process to stop recording.
29 1. Start the program in replay mode with a previously recorded archive.
30 $ sudo ./replay.py archive.wpr
31 2. Load recorded pages in a web browser. A 404 will be served for any pages or
32 resources not in the recorded archive.
34 Network simulation examples:
35 # 128KByte/s uplink bandwidth, 4Mbps/s downlink bandwidth with 100ms RTT time
36 $ sudo ./replay.py --up 128KByte/s --down 4Mbit/s --delay_ms=100 archive.wpr
39 $ sudo ./replay.py --packet_loss_rate=0.01 archive.wpr
50 import cachemissarchive
57 import platformsettings
58 import replayspdyserver
59 import script_injector
63 if sys.version < '2.6':
64 print 'Need Python 2.6 or greater.'
68 def configure_logging(log_level_name, log_file_name=None):
69 """Configure logging level and format.
72 log_level_name: 'debug', 'info', 'warning', 'error', or 'critical'.
73 log_file_name: a file name
75 if logging.root.handlers:
76 logging.critical('A logging method (e.g. "logging.warn(...)")'
77 ' was called before logging was configured.')
78 log_level = getattr(logging, log_level_name.upper())
79 log_format = '%(asctime)s %(levelname)s %(message)s'
80 logging.basicConfig(level=log_level, format=log_format)
81 logger = logging.getLogger()
83 fh = logging.FileHandler(log_file_name)
84 fh.setLevel(log_level)
85 fh.setFormatter(logging.Formatter(log_format))
87 system_handler = platformsettings.get_system_logging_handler()
89 logger.addHandler(system_handler)
92 def AddDnsForward(server_manager, host):
93 """Forward DNS traffic."""
94 server_manager.Append(platformsettings.set_temporary_primary_nameserver, host)
97 def AddDnsProxy(server_manager, options, host, port, real_dns_lookup,
100 if options.dns_private_passthrough:
101 private_filter = dnsproxy.PrivateIpFilter(real_dns_lookup, http_archive)
102 dns_filters.append(private_filter)
103 server_manager.AppendRecordCallback(private_filter.InitializeArchiveHosts)
104 server_manager.AppendReplayCallback(private_filter.InitializeArchiveHosts)
105 if options.shaping_dns:
106 delay_filter = dnsproxy.DelayFilter(options.record, **options.shaping_dns)
107 dns_filters.append(delay_filter)
108 server_manager.AppendRecordCallback(delay_filter.SetRecordMode)
109 server_manager.AppendReplayCallback(delay_filter.SetReplayMode)
110 server_manager.Append(dnsproxy.DnsProxyServer, host, port,
111 dns_lookup=dnsproxy.ReplayDnsLookup(host, dns_filters))
114 def AddWebProxy(server_manager, options, host, real_dns_lookup, http_archive,
116 inject_script = script_injector.GetInjectScript(options.inject_scripts)
117 custom_handlers = customhandlers.CustomHandlers(options, http_archive)
119 assert not options.record, 'spdy cannot be used with --record.'
120 archive_fetch = httpclient.ReplayHttpArchiveFetch(
121 http_archive, real_dns_lookup,
123 options.diff_unknown_requests,
124 cache_misses=cache_misses,
125 use_closest_match=options.use_closest_match,
126 scramble_images=options.scramble_images)
127 server_manager.Append(
128 replayspdyserver.ReplaySpdyServer, archive_fetch,
129 custom_handlers, host=host, port=options.port,
130 certfile=options.certfile)
132 custom_handlers.add_server_manager_handler(server_manager)
133 archive_fetch = httpclient.ControllableHttpArchiveFetch(
134 http_archive, real_dns_lookup,
136 options.diff_unknown_requests, options.record,
137 cache_misses=cache_misses, use_closest_match=options.use_closest_match,
138 scramble_images=options.scramble_images)
139 server_manager.AppendRecordCallback(archive_fetch.SetRecordMode)
140 server_manager.AppendReplayCallback(archive_fetch.SetReplayMode)
141 server_manager.Append(
142 httpproxy.HttpProxyServer,
143 archive_fetch, custom_handlers,
144 host=host, port=options.port, **options.shaping_http)
146 server_manager.Append(
147 httpproxy.HttpsProxyServer,
148 archive_fetch, custom_handlers, options.certfile,
149 host=host, port=options.ssl_port, **options.shaping_http)
152 def AddTrafficShaper(server_manager, options, host):
153 if options.shaping_dummynet:
154 server_manager.AppendTrafficShaper(
155 trafficshaper.TrafficShaper, host=host,
156 use_loopback=not options.server_mode and host == '127.0.0.1',
157 **options.shaping_dummynet)
160 class OptionsWrapper(object):
161 """Add checks, updates, and methods to option values.
164 options, args = option_parser.parse_args()
165 options = OptionsWrapper(options, option_parser) # run checks and updates
166 if options.record and options.HasTrafficShaping():
169 _TRAFFICSHAPING_OPTIONS = set(
170 ['down', 'up', 'delay_ms', 'packet_loss_rate', 'init_cwnd', 'net'])
171 _CONFLICTING_OPTIONS = (
172 ('record', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
173 'spdy', 'use_server_delay')),
174 ('append', ('down', 'up', 'delay_ms', 'packet_loss_rate', 'net',
175 'spdy', 'use_server_delay')), # same as --record
176 ('net', ('down', 'up', 'delay_ms')),
177 ('server', ('server_mode',)),
180 def __init__(self, options, parser):
181 self._options = options
182 self._parser = parser
183 self._nondefaults = set([
184 name for name, value in parser.defaults.items()
185 if getattr(options, name) != value])
186 self._CheckConflicts()
187 self._CheckValidIp('host')
188 self._MassageValues()
190 def _CheckConflicts(self):
191 """Give an error if mutually exclusive options are used."""
192 for option, bad_options in self._CONFLICTING_OPTIONS:
193 if option in self._nondefaults:
194 for bad_option in bad_options:
195 if bad_option in self._nondefaults:
196 self._parser.error('Option --%s cannot be used with --%s.' %
197 (bad_option, option))
199 def _CheckValidIp(self, name):
200 """Give an error if option |name| is not a valid IPv4 address."""
201 value = getattr(self._options, name)
204 socket.inet_aton(value)
206 self._parser.error('Option --%s must be a valid IPv4 address.' % name)
208 def _ShapingKeywordArgs(self, shaping_key):
209 """Return the shaping keyword args for |shaping_key|.
212 shaping_key: one of 'dummynet', 'dns', 'http'.
214 {} # if shaping_key does not apply, or options have default values.
218 def AddItemIfSet(d, kw_key, opt_key=None):
219 opt_key = opt_key or kw_key
220 if opt_key in self._nondefaults:
221 d[kw_key] = getattr(self, opt_key)
222 if ((self.shaping_type == 'proxy' and shaping_key in ('dns', 'http')) or
223 self.shaping_type == shaping_key):
224 AddItemIfSet(kwargs, 'delay_ms')
225 if shaping_key in ('dummynet', 'http'):
226 AddItemIfSet(kwargs, 'down_bandwidth', opt_key='down')
227 AddItemIfSet(kwargs, 'up_bandwidth', opt_key='up')
228 if shaping_key == 'dummynet':
229 AddItemIfSet(kwargs, 'packet_loss_rate')
230 AddItemIfSet(kwargs, 'init_cwnd')
231 elif self.shaping_type != 'none':
232 if 'packet_loss_rate' in self._nondefaults:
233 logging.warn('Shaping type, %s, ignores --packet_loss_rate=%s',
234 self.shaping_type, self.packet_loss_rate)
235 if 'init_cwnd' in self._nondefaults:
236 logging.warn('Shaping type, %s, ignores --init_cwnd=%s',
237 self.shaping_type, self.init_cwnd)
240 def _MassageValues(self):
241 """Set options that depend on the values of other options."""
242 if self.append and not self.record:
243 self._options.record = True
245 self._options.down, self._options.up, self._options.delay_ms = \
246 net_configs.GetNetConfig(self.net)
247 self._nondefaults.update(['down', 'up', 'delay_ms'])
249 self._options.certfile = None
250 self.shaping_dns = self._ShapingKeywordArgs('dns')
251 self.shaping_http = self._ShapingKeywordArgs('http')
252 self.shaping_dummynet = self._ShapingKeywordArgs('dummynet')
254 def __getattr__(self, name):
255 """Make the original option values available."""
256 return getattr(self._options, name)
259 """Return a json representation of the original options dictionary."""
260 return json.dumps(self._options.__dict__)
262 def IsRootRequired(self):
263 """Returns True iff the options require whole program root access."""
267 def IsPrivilegedPort(port):
268 return port and port < 1024
270 if IsPrivilegedPort(self.port) or IsPrivilegedPort(self.ssl_port):
273 if self.dns_forwarding:
274 if IsPrivilegedPort(self.dns_port):
276 if not self.server_mode and self.host == '127.0.0.1':
282 def replay(options, replay_filename):
283 if options.admin_check and options.IsRootRequired():
284 platformsettings.rerun_as_administrator()
285 configure_logging(options.log_level, options.log_file)
286 server_manager = servermanager.ServerManager(options.record)
288 if options.cache_miss_file:
289 if os.path.exists(options.cache_miss_file):
290 logging.warning('Cache Miss Archive file %s already exists; '
291 'replay will load and append entries to archive file',
292 options.cache_miss_file)
293 cache_misses = cachemissarchive.CacheMissArchive.Load(
294 options.cache_miss_file)
296 cache_misses = cachemissarchive.CacheMissArchive(
297 options.cache_miss_file)
299 AddDnsForward(server_manager, options.server)
303 host = platformsettings.get_server_ip_address(options.server_mode)
304 real_dns_lookup = dnsproxy.RealDnsLookup(
305 name_servers=[platformsettings.get_original_primary_nameserver()])
307 httparchive.HttpArchive.AssertWritable(replay_filename)
308 if options.append and os.path.exists(replay_filename):
309 http_archive = httparchive.HttpArchive.Load(replay_filename)
310 logging.info('Appending to %s (loaded %d existing responses)',
311 replay_filename, len(http_archive))
313 http_archive = httparchive.HttpArchive()
315 http_archive = httparchive.HttpArchive.Load(replay_filename)
316 logging.info('Loaded %d responses from %s',
317 len(http_archive), replay_filename)
318 server_manager.AppendRecordCallback(real_dns_lookup.ClearCache)
319 server_manager.AppendRecordCallback(http_archive.clear)
321 if options.dns_forwarding:
322 if not options.server_mode and host == '127.0.0.1':
323 AddDnsForward(server_manager, host)
324 AddDnsProxy(server_manager, options, host, options.dns_port,
325 real_dns_lookup, http_archive)
326 if options.ssl and options.certfile is None:
327 options.certfile = os.path.join(os.path.dirname(__file__), 'wpr_cert.pem')
328 http_proxy_address = options.host
329 if not http_proxy_address:
330 http_proxy_address = platformsettings.get_httpproxy_ip_address(
332 AddWebProxy(server_manager, options, http_proxy_address, real_dns_lookup,
333 http_archive, cache_misses)
334 AddTrafficShaper(server_manager, options, host)
339 except KeyboardInterrupt:
340 logging.info('Shutting down.')
341 except (dnsproxy.DnsProxyException,
342 trafficshaper.TrafficShaperException,
343 platformsettings.NotAdministratorError,
344 platformsettings.DnsUpdateError) as e:
345 logging.critical('%s: %s', e.__class__.__name__, e)
348 logging.critical(traceback.format_exc())
352 http_archive.Persist(replay_filename)
353 logging.info('Saved %d responses to %s', len(http_archive), replay_filename)
355 cache_misses.Persist()
356 logging.info('Saved %d cache misses and %d requests to %s',
357 cache_misses.get_total_cache_misses(),
358 len(cache_misses.request_counts.keys()),
359 options.cache_miss_file)
363 def GetOptionParser():
364 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
365 def format_description(self, description):
367 return description + '\n'
370 option_parser = optparse.OptionParser(
371 usage='%prog [options] replay_file',
372 formatter=PlainHelpFormatter(),
374 epilog='http://code.google.com/p/web-page-replay/')
376 option_parser.add_option('--spdy', default=False,
378 help='Replay via SPDY. (Can be combined with --no-ssl).')
379 option_parser.add_option('-r', '--record', default=False,
381 help='Download real responses and record them to replay_file')
382 option_parser.add_option('--append', default=False,
384 help='Append responses to replay_file.')
385 option_parser.add_option('-l', '--log_level', default='debug',
388 choices=('debug', 'info', 'warning', 'error', 'critical'),
389 help='Minimum verbosity level to log')
390 option_parser.add_option('-f', '--log_file', default=None,
393 help='Log file to use in addition to writting logs to stderr.')
394 option_parser.add_option('-e', '--cache_miss_file', default=None,
396 dest='cache_miss_file',
398 help='Archive file to record cache misses as pickled objects.'
399 'Cache misses occur when a request cannot be served in replay mode.')
401 network_group = optparse.OptionGroup(option_parser,
402 'Network Simulation Options',
403 'These options configure the network simulation in replay mode')
404 network_group.add_option('-u', '--up', default='0',
407 help='Upload Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
408 network_group.add_option('-d', '--down', default='0',
411 help='Download Bandwidth in [K|M]{bit/s|Byte/s}. Zero means unlimited.')
412 network_group.add_option('-m', '--delay_ms', default='0',
415 help='Propagation delay (latency) in milliseconds. Zero means no delay.')
416 network_group.add_option('-p', '--packet_loss_rate', default='0',
419 help='Packet loss rate in range [0..1]. Zero means no loss.')
420 network_group.add_option('-w', '--init_cwnd', default='0',
423 help='Set initial cwnd (linux only, requires kernel patch)')
424 network_group.add_option('--net', default=None,
427 choices=net_configs.NET_CONFIG_NAMES,
428 help='Select a set of network options: %s.' % ', '.join(
429 net_configs.NET_CONFIG_NAMES))
430 network_group.add_option('--shaping_type', default='dummynet',
432 choices=('dummynet', 'proxy'),
433 help='When shaping is configured (i.e. --up, --down, etc.) decides '
434 'whether to use |dummynet| (default), or |proxy| servers.')
435 option_parser.add_option_group(network_group)
437 harness_group = optparse.OptionGroup(option_parser,
438 'Replay Harness Options',
439 'These advanced options configure various aspects of the replay harness')
440 harness_group.add_option('-S', '--server', default=None,
443 help='IP address of host running "replay.py --server_mode". '
444 'This only changes the primary DNS nameserver to use the given IP.')
445 harness_group.add_option('-M', '--server_mode', default=False,
447 help='Run replay DNS & http proxies, and trafficshaping on --port '
448 'without changing the primary DNS nameserver. '
449 'Other hosts may connect to this using "replay.py --server" '
450 'or by pointing their DNS to this server.')
451 harness_group.add_option('-i', '--inject_scripts', default='deterministic.js',
453 dest='inject_scripts',
454 help='A comma separated list of JavaScript sources to inject in all '
455 'pages. By default a script is injected that eliminates sources '
456 'of entropy such as Date() and Math.random() deterministic. '
457 'CAUTION: Without deterministic.js, many pages will not replay.')
458 harness_group.add_option('-D', '--no-diff_unknown_requests', default=True,
459 action='store_false',
460 dest='diff_unknown_requests',
461 help='During replay, do not show a diff of unknown requests against '
462 'their nearest match in the archive.')
463 harness_group.add_option('-C', '--use_closest_match', default=False,
465 dest='use_closest_match',
466 help='During replay, if a request is not found, serve the closest match'
467 'in the archive instead of giving a 404.')
468 harness_group.add_option('-U', '--use_server_delay', default=False,
470 dest='use_server_delay',
471 help='During replay, simulate server delay by delaying response time to'
473 harness_group.add_option('-I', '--screenshot_dir', default=None,
476 help='Save PNG images of the loaded page in the given directory.')
477 harness_group.add_option('-P', '--no-dns_private_passthrough', default=True,
478 action='store_false',
479 dest='dns_private_passthrough',
480 help='Don\'t forward DNS requests that resolve to private network '
481 'addresses. CAUTION: With this option important services like '
482 'Kerberos will resolve to the HTTP proxy address.')
483 harness_group.add_option('-x', '--no-dns_forwarding', default=True,
484 action='store_false',
485 dest='dns_forwarding',
486 help='Don\'t forward DNS requests to the local replay server. '
487 'CAUTION: With this option an external mechanism must be used to '
488 'forward traffic to the replay server.')
489 harness_group.add_option('--host', default=None,
492 help='The IP address to bind all servers to. Defaults to 0.0.0.0 or '
493 '127.0.0.1, depending on --server_mode and platform.')
494 harness_group.add_option('-o', '--port', default=80,
497 help='Port number to listen on.')
498 harness_group.add_option('--ssl_port', default=443,
501 help='SSL port number to listen on.')
502 harness_group.add_option('--dns_port', default=53,
505 help='DNS port number to listen on.')
506 harness_group.add_option('-c', '--certfile', default=None,
509 help='Certificate file to use with SSL (gets auto-generated if needed).')
510 harness_group.add_option('--no-ssl', default=True,
511 action='store_false',
513 help='Do not setup an SSL proxy.')
514 option_parser.add_option_group(harness_group)
515 harness_group.add_option('--no-admin-check', default=True,
516 action='store_false',
518 help='Do not check if administrator access is needed.')
519 harness_group.add_option('--scramble_images', default=False,
521 dest='scramble_images',
522 help='Scramble image responses.')
527 option_parser = GetOptionParser()
528 options, args = option_parser.parse_args()
529 options = OptionsWrapper(options, option_parser)
532 replay_filename = None
534 option_parser.error('Must specify a replay_file')
536 replay_filename = args[0]
538 return replay(options, replay_filename)
541 if __name__ == '__main__':