1 # Copyright 2013 The Swarming Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 that
3 # can be found in the LICENSE file.
5 """Various utility functions and classes not specific to any single area."""
12 import logging.handlers
21 from utils import zip_package
24 # Path to (possibly extracted from zip) cacert.pem bundle file.
25 # See get_cacerts_bundle().
27 _ca_certs_lock = threading.Lock()
30 # @cached decorators registered by report_cache_stats_at_exit.
32 _caches_lock = threading.Lock()
35 class OptionParserWithLogging(optparse.OptionParser):
36 """Adds --verbose option."""
38 # Set to True to enable --log-file options.
39 enable_log_file = True
41 def __init__(self, verbose=0, log_file=None, **kwargs):
42 kwargs.setdefault('description', sys.modules['__main__'].__doc__)
43 optparse.OptionParser.__init__(self, **kwargs)
44 self.group_logging = optparse.OptionGroup(self, 'Logging')
45 self.group_logging.add_option(
49 help='Use multiple times to increase verbosity')
50 if self.enable_log_file:
51 self.group_logging.add_option(
54 help='The name of the file to store rotating log details')
55 self.group_logging.add_option(
56 '--no-log', action='store_const', const='', dest='log_file',
57 help='Disable log file')
59 def parse_args(self, *args, **kwargs):
60 # Make sure this group is always the last one.
61 self.add_option_group(self.group_logging)
63 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
64 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
65 level = levels[min(len(levels) - 1, options.verbose)]
67 logging_console = logging.StreamHandler()
68 logging_console.setFormatter(logging.Formatter(
69 '%(levelname)5s %(relativeCreated)6d %(module)15s(%(lineno)3d): '
71 logging_console.setLevel(level)
72 logging.getLogger().setLevel(level)
73 logging.getLogger().addHandler(logging_console)
75 if self.enable_log_file and options.log_file:
76 # This is necessary otherwise attached handler will miss the messages.
77 logging.getLogger().setLevel(logging.DEBUG)
79 logging_rotating_file = logging.handlers.RotatingFileHandler(
81 maxBytes=10 * 1024 * 1024,
84 # log files are always at DEBUG level.
85 logging_rotating_file.setLevel(logging.DEBUG)
86 logging_rotating_file.setFormatter(logging.Formatter(
87 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
88 logging.getLogger().addHandler(logging_rotating_file)
93 class Profiler(object):
94 """Context manager that records time spend inside its body."""
95 def __init__(self, name):
97 self.start_time = None
100 self.start_time = time.time()
103 def __exit__(self, _exc_type, _exec_value, _traceback):
104 time_taken = time.time() - self.start_time
105 logging.info('Profiling: Section %s took %3.3f seconds',
106 self.name, time_taken)
109 class ProfileCounter(object):
110 """Records total time spent in a chunk of code during lifetime of a process.
112 Recursive calls count as a single call (i.e. only the time spent in the outer
115 Autoregisters itself in a global list when instantiated. All counters will be
116 reported at the process exit time (in atexit hook). Best to be used as with
120 _instances_lock = threading.Lock()
125 print('\nProfiling report:')
128 '{:<38}{:<10}{:<16}{:<16}'.format(
129 'Name', 'Count', 'Total ms', 'Average ms'))
131 with ProfileCounter._instances_lock:
132 for i in sorted(ProfileCounter._instances, key=lambda x: -x.total_time):
134 '{:<38}{:<10}{:<16.1f}{:<16.1f}'.format(
138 i.average_time * 1000))
141 def __init__(self, name):
142 self._lock = threading.Lock()
146 self._active = threading.local()
147 with self._instances_lock:
148 self._instances.append(self)
149 if len(self._instances) == 1:
150 atexit.register(ProfileCounter.summarize_all)
157 def call_count(self):
158 return self._call_count
161 def total_time(self):
162 return self._total_time
165 def average_time(self):
168 return self._total_time / self._call_count
172 recursion = getattr(self._active, 'recursion', 0)
174 self._active.started = time.time()
175 self._active.recursion = recursion + 1
177 def __exit__(self, _exc_type, _exec_value, _traceback):
178 self._active.recursion -= 1
179 if not self._active.recursion:
180 time_inside = time.time() - self._active.started
182 self._total_time += time_inside
183 self._call_count += 1
187 """Decorator that profiles a function if SWARMING_PROFILE env var is set.
189 Will gather a number of calls to that function and total time spent inside.
190 The final report is emitted to stdout at the process exit time.
192 # No performance impact whatsoever if SWARMING_PROFILE is not set.
193 if os.environ.get('SWARMING_PROFILE') != '1':
195 timer = ProfileCounter(func.__name__)
196 @functools.wraps(func)
197 def wrapper(*args, **kwargs):
199 return func(*args, **kwargs)
203 def report_cache_stats_at_exit(func, cache):
204 """Registers a hook that reports state of the cache on the process exit."""
205 # Very dumb. Tries to account for object reuse though.
206 def get_size(obj, seen):
207 # Use id(...) to avoid triggering __hash__ and comparing by value instead.
211 size = sys.getsizeof(obj)
212 if isinstance(obj, (list, tuple)):
213 return size + sum(get_size(x, seen) for x in obj)
214 elif isinstance(obj, dict):
216 get_size(k, seen) + get_size(v, seen) for k, v in obj.iteritems())
219 def report_caches_state():
220 print('\nFunction cache report:')
222 print('{:<40}{:<16}{:<26}'.format('Name', 'Items', 'Approx size, KB'))
227 for func, cache in sorted(_caches, key=lambda x: -len(x[1])):
228 size = get_size(cache, seen_objects)
231 '{:<40}{:<16}{:<26}'.format(func.__name__, len(cache), size / 1024))
233 print('Total: %.1f MB' % (total / 1024 / 1024,))
237 _caches.append((func, cache))
238 if len(_caches) == 1:
239 atexit.register(report_caches_state)
243 """Decorator that permanently caches a result of function invocation.
245 It tries to be super fast and because of that is somewhat limited:
246 * The function being cached can accept only positional arguments.
247 * All arguments should be hashable.
248 * The function may be called multiple times with same arguments in
249 multithreaded environment.
250 * The cache is not cleared up at all.
252 If SWARMING_PROFILE env var is set, will produce a report about the state of
253 the cache at the process exit (number of items and approximate size).
258 if os.environ.get('SWARMING_PROFILE') == '1':
259 report_cache_stats_at_exit(func, cache)
261 @functools.wraps(func)
263 v = cache.get(args, empty)
272 class Unbuffered(object):
273 """Disable buffering on a file object."""
274 def __init__(self, stream):
277 def write(self, data):
278 self.stream.write(data)
282 def __getattr__(self, attr):
283 return getattr(self.stream, attr)
286 def disable_buffering():
287 """Makes this process and child processes stdout unbuffered."""
288 if not os.environ.get('PYTHONUNBUFFERED'):
289 # Since sys.stdout is a C++ object, it's impossible to do
290 # sys.stdout.write = lambda...
291 sys.stdout = Unbuffered(sys.stdout)
292 os.environ['PYTHONUNBUFFERED'] = 'x'
295 def fix_python_path(cmd):
296 """Returns the fixed command line to call the right python executable."""
298 if out[0] == 'python':
299 out[0] = sys.executable
300 elif out[0].endswith('.py'):
301 out.insert(0, sys.executable)
305 def read_json(filepath):
306 with open(filepath, 'r') as f:
310 def write_json(filepath_or_handle, data, dense):
311 """Writes data into filepath or file handle encoded as json.
313 If dense is True, the json is packed. Otherwise, it is human readable.
316 kwargs = {'sort_keys': True, 'separators': (',',':')}
318 kwargs = {'sort_keys': True, 'indent': 2}
320 if hasattr(filepath_or_handle, 'write'):
321 json.dump(data, filepath_or_handle, **kwargs)
323 with open(filepath_or_handle, 'wb') as f:
324 json.dump(data, f, **kwargs)
327 def format_json(data, dense):
328 """Returns a string with json encoded data.
330 If dense is True, the json is packed. Otherwise, it is human readable.
332 buf = cStringIO.StringIO()
333 write_json(buf, data, dense)
334 return buf.getvalue()
337 def gen_blacklist(regexes):
338 """Returns a lambda to be used as a blacklist."""
339 compiled = [re.compile(i) for i in regexes or []]
340 return lambda f: any(j.match(f) for j in compiled)
343 def get_bool_env_var(name):
344 """Return True if integer environment variable |name| value is non zero.
346 If environment variable is missing or is set to '0', returns False.
348 return bool(int(os.environ.get(name, '0')))
352 """True if running in non-interactive mode on some bot machine.
354 Examines os.environ for presence of SWARMING_HEADLESS var.
356 headless_env_keys = (
357 # This is Chromium specific. Set when running under buildbot slave.
359 # Set when running under swarm bot.
362 return any(get_bool_env_var(key) for key in headless_env_keys)
365 def get_cacerts_bundle():
366 """Returns path to a file with CA root certificates bundle.
368 Python's ssl module needs a real file on disk, so if code is running from
369 a zip archive, we need to extract the file first.
373 if _ca_certs is not None and os.path.exists(_ca_certs):
375 # Some rogue process clears /tmp and causes cacert.pem to disappear. Extract
376 # to current directory instead. We use our own bundled copy of cacert.pem.
377 _ca_certs = zip_package.extract_resource(utils, 'cacert.pem', temp_dir='.')