1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
14 from common import utils
15 from result import Result
18 INFINITY = float('inf')
20 MAX_THREAD_NUMBER = 10
24 def SignalWorkerThreads():
29 for i in range(MAX_THREAD_NUMBER):
32 # Give worker threads a chance to exit.
33 # Workaround the harmless bug in python 2.7 below.
37 atexit.register(SignalWorkerThreads)
44 task = TASK_QUEUE.get()
48 # According to http://bugs.python.org/issue14623, this is a harmless bug
49 # in python 2.7 which won't be fixed.
50 # The exception is raised on daemon threads when python interpreter is
54 function, args, kwargs, result_semaphore = task
56 function(*args, **kwargs)
60 # Signal one task is done in case of exception.
61 result_semaphore.release()
65 """Run given tasks. Not thread-safe: no concurrent calls of this function.
67 Return after all tasks were completed. A task is a dict as below:
69 'function': the function to call,
70 'args': the positional argument to pass to the function,
71 'kwargs': the key-value arguments to pass to the function,
79 TASK_QUEUE = Queue.Queue()
80 for index in range(MAX_THREAD_NUMBER):
81 thread = threading.Thread(target=Worker, name='worker_%s' % index)
82 # Set as daemon, so no join is needed.
86 result_semaphore = threading.Semaphore(0)
87 # Push task to task queue for execution.
90 (task['function'], task.get('args', []),
91 task.get('kwargs', {}), result_semaphore))
93 # Wait until all tasks to be executed.
95 result_semaphore.acquire()
98 def GetRepositoryType(revision_number):
99 """Returns the repository type of this revision number.
102 revision_number: A revision number or git hash.
105 'git' or 'svn', depending on the revision_number.
107 if utils.IsGitHash(revision_number):
113 def ParseURLsFromConfig(file_name):
114 """Parses URLS from the config file.
116 The file should be in python config format, where svn section is in the
117 format "svn:component_path".
118 Each of the section for svn should contain changelog_url, revision_url,
119 diff_url and blame_url.
122 file_name: The name of the file that contains URL information.
125 A dictionary that maps repository type to list of URLs. For svn, it maps
126 key 'svn' to another dictionary, which maps component path to the URLs
127 as explained above. For git, it maps to the URLs as explained above.
129 config = ConfigParser.ConfigParser()
131 # Get the absolute path of the config file, and read the file. If it fails,
133 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
135 config.read(config_file_path)
139 # Iterate through the config file, check for sections.
141 for section in config.sections():
142 # These two do not need another layer of dictionary, so add it and go
144 if ':' not in section:
145 for option in config.options(section):
146 if section not in config_dict:
147 config_dict[section] = {}
149 url = config.get(section, option)
150 config_dict[section][option] = url
154 # Get repository type and component name from the section name.
155 repository_type_and_component = section.split(':')
156 repository_type = repository_type_and_component[0]
157 component_path = repository_type_and_component[1]
159 # Add 'svn' as the key, if it is not already there.
160 if repository_type not in config_dict:
161 config_dict[repository_type] = {}
162 url_map_for_repository = config_dict[repository_type]
164 # Add the path to the 'svn', if it is not already there.
165 if component_path not in url_map_for_repository:
166 url_map_for_repository[component_path] = {}
167 type_to_url = url_map_for_repository[component_path]
169 # Add all URLs to this map.
170 for option in config.options(section):
171 url = config.get(section, option)
172 type_to_url[option] = url
177 def NormalizePath(path, parsed_deps):
178 """Normalizes the path.
181 path: A string representing a path.
182 parsed_deps: A map from component path to its component name, repository,
186 A tuple containing a component this path is in (e.g blink, skia, etc)
187 and a path in that component's repository. Returns None if the component
188 repository is not supported, i.e from googlecode.
190 # First normalize the path by retreiving the normalized path.
191 normalized_path = os.path.normpath(path).replace('\\', '/')
193 # Iterate through all component paths in the parsed DEPS, in the decreasing
194 # order of the length of the file path.
195 for component_path in sorted(parsed_deps,
196 key=(lambda path: -len(path))):
197 # new_component_path is the component path with 'src/' removed.
198 new_component_path = component_path
199 if new_component_path.startswith('src/') and new_component_path != 'src/':
200 new_component_path = new_component_path[len('src/'):]
202 # We need to consider when the lowercased component path is in the path,
203 # because syzyasan build returns lowercased file path.
204 lower_component_path = new_component_path.lower()
206 # If this path is the part of file path, this file must be from this
208 if new_component_path in normalized_path or \
209 lower_component_path in normalized_path:
211 # Case when the retreived path is in lowercase.
212 if lower_component_path in normalized_path:
213 current_component_path = lower_component_path
215 current_component_path = new_component_path
217 # Normalize the path by stripping everything off the component's relative
219 normalized_path = normalized_path.split(current_component_path, 1)[1]
220 lower_normalized_path = normalized_path.lower()
222 # Add 'src/' or 'Source/' at the front of the normalized path, depending
223 # on what prefix the component path uses. For example, blink uses
224 # 'Source' but chromium uses 'src/', and blink component path is
225 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
227 if not (lower_normalized_path.startswith('src/') or
228 lower_normalized_path.startswith('source/')):
230 if (lower_component_path.endswith('src/') or
231 lower_component_path.endswith('source/')):
232 normalized_path = (current_component_path.split('/')[-2] + '/' +
236 normalized_path = 'src/' + normalized_path
238 component_name = parsed_deps[component_path]['name']
240 return (component_path, component_name, normalized_path)
242 # If the path does not match any component, default to chromium.
243 return ('src/', 'chromium', normalized_path)
246 def SplitRange(regression):
247 """Splits a range as retrieved from clusterfuzz.
250 regression: A string in format 'r1234:r5678'.
253 A list containing two numbers represented in string, for example
259 revisions = regression.split(':')
261 # If regression information is not available, return none.
262 if len(revisions) != 2:
265 range_start = revisions[0]
266 range_end = revisions[1]
268 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
269 # the range is in git hash and it starts with 'r'.
270 if range_start.startswith('r'):
271 range_start = range_start[1:]
273 if range_end.startswith('r'):
274 range_end = range_end[1:]
276 return [range_start, range_end]
279 def LoadJSON(json_string):
280 """Loads json object from string, or None.
283 json_string: A string to get object from.
286 JSON object if the string represents a JSON object, None otherwise.
289 data = json.loads(json_string)
296 def GetDataFromURL(url):
297 """Retrieves raw data from URL, tries 10 times.
300 url: URL to get data from.
301 retries: Number of times to retry connection.
304 None if the data retrieval fails, or the raw data.
306 status_code, data = utils.GetHttpClient().Get(url, retries=10)
307 if status_code == 200:
310 # Return None if it fails to read data.
314 def FindMinLineDistance(crashed_line_list, changed_line_numbers,
316 """Calculates how far the changed line is from one of the crashes.
318 Finds the minimum distance between the lines that the file crashed on
319 and the lines that the file changed. For example, if the file crashed on
320 line 200 and the CL changes line 203,204 and 205, the function returns 3.
323 crashed_line_list: A list of lines that the file crashed on.
324 changed_line_numbers: A list of lines that the file changed.
325 line_range: Number of lines to look back for.
328 The minimum distance. If either of the input lists is empty,
332 min_distance = INFINITY
336 crashed_line_numbers = set()
337 for crashed_line_range in crashed_line_list:
338 for crashed_line in crashed_line_range:
339 for line in range(crashed_line - line_range, crashed_line + 1):
340 crashed_line_numbers.add(line)
342 for line in crashed_line_numbers:
343 for distance in changed_line_numbers:
344 # Find the current distance and update the min if current distance is
345 # less than current min.
346 current_distance = abs(line - distance)
347 if current_distance < min_distance:
348 min_distance = current_distance
350 changed_line = distance
352 return (min_distance, crashed_line, changed_line)
355 def GuessIfSameSubPath(path1, path2):
356 """Guesses if two paths represent same path.
358 Compares the name of the folders in the path (by split('/')), and checks
359 if they match either more than 3 or min of path lengths.
363 path2: Second path to compare.
366 True if it they are thought to be a same path, False otherwise.
368 path1 = path1.split('/')
369 path2 = path2.split('/')
371 intersection = set(path1).intersection(set(path2))
372 return len(intersection) >= (min(3, min(len(path1), len(path2))))
375 def FindMinStackFrameNumber(stack_frame_indices, priorities):
376 """Finds the minimum stack number, from the list of stack numbers.
379 stack_frame_indices: A list of lists containing stack position.
380 priorities: A list of of priority for each file.
383 Inf if stack_frame_indices is empty, minimum stack number otherwise.
385 # Get the indexes of the highest priority (or low priority number).
386 highest_priority = min(priorities)
387 highest_priority_indices = []
388 for i in range(len(priorities)):
389 if priorities[i] == highest_priority:
390 highest_priority_indices.append(i)
392 # Gather the list of stack frame numbers for the files that change the
395 for i in highest_priority_indices:
396 flattened += stack_frame_indices[i]
398 # If no stack frame information is available, return inf. Else, return min.
402 return min(flattened)
405 def AddHyperlink(text, link):
406 """Returns a string with HTML link tag.
409 text: A string to add link.
410 link: A link to add to the string.
413 A string with hyperlink added.
415 sanitized_link = cgi.escape(link, quote=True)
416 sanitized_text = cgi.escape(str(text))
417 return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
420 def PrettifyList(items):
421 """Returns a string representation of a list.
423 It adds comma in between the elements and removes the brackets.
425 items: A list to prettify.
427 A string representation of the list.
429 return ', '.join(map(str, items))
432 def PrettifyFrameInfo(frame_indices, functions):
433 """Return a string to represent the frames with functions."""
435 for frame_index, function in zip(frame_indices, functions):
436 frames.append('frame #%s, "%s"' % (frame_index, function.split('(')[0]))
437 return '; '.join(frames)
440 def PrettifyFiles(file_list):
441 """Returns a string representation of a list of file names.
444 file_list: A list of tuple, (file_name, file_url).
446 A string representation of file names with their urls.
449 for file_name, file_url in file_list:
450 ret.append(' %s\n' % AddHyperlink(file_name, file_url))
454 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
455 function, line_range=3):
456 """Finds the overlap betwee changed lines and crashed lines.
458 Finds the intersection of the lines that caused the crash and
459 lines that the file changes. The intersection looks within 3 lines
460 of the line that caused the crash.
463 crashed_line_list: A list of lines that the file crashed on.
464 stack_frame_index: A list of positions in stack for each of the lines.
465 changed_line_numbers: A list of lines that the file changed.
466 function: A list of functions that the file crashed on.
467 line_range: Number of lines to look backwards from crashed lines.
470 line_number_intersection: Intersection between crashed_line_list and
471 changed_line_numbers.
472 stack_frame_index_intersection: Stack number for each of the intersections.
474 line_number_intersection = []
475 stack_frame_index_intersection = []
476 function_intersection = []
478 # Iterate through the crashed lines, and its occurence in stack.
479 for (lines, stack_frame_index, function_name) in zip(
480 crashed_line_list, stack_frame_index, function):
481 # Also check previous 'line_range' lines. Create a set of all changed lines
482 # and lines within 3 lines range before the crashed line.
485 for line_in_range in range(line - line_range, line + 1):
486 line_minus_n.add(line_in_range)
488 for changed_line in changed_line_numbers:
489 # If a CL does not change crahsed line, check next line.
490 if changed_line not in line_minus_n:
493 intersected_line = set()
494 # If the changed line is exactly the crashed line, add that line.
496 if line in changed_line_numbers:
497 intersected_line.add(line)
499 # If the changed line is in 3 lines of the crashed line, add the line.
501 intersected_line.add(changed_line)
503 # Avoid adding the same line twice.
504 if intersected_line not in line_number_intersection:
505 line_number_intersection.append(list(intersected_line))
506 stack_frame_index_intersection.append(stack_frame_index)
507 function_intersection.append(function_name)
510 return (line_number_intersection, stack_frame_index_intersection,
511 function_intersection)
514 def MatchListToResultList(matches):
515 """Convert list of matches to the list of result objects.
518 matches: A list of match objects along with its stack priority and revision
521 A list of result object.
526 for _, cl, match in matches:
528 revision_url = match.revision_url
529 component_name = match.component_name
530 author = match.author
531 reason = match.reason
532 review_url = match.review_url
533 reviewers = match.reviewers
534 # For matches, line content do not exist.
536 message = match.message
538 result = Result(suspected_cl, revision_url, component_name, author, reason,
539 review_url, reviewers, line_content, message)
540 result_list.append(result)
545 def BlameListToResultList(blame_list):
546 """Convert blame list to the list of result objects.
549 blame_list: A list of blame objects.
552 A list of result objects.
556 for blame in blame_list:
557 suspected_cl = blame.revision
558 revision_url = blame.url
559 component_name = blame.component_name
560 author = blame.author
562 'The CL last changed line %s of file %s, which is stack frame %d.' %
563 (blame.line_number, blame.file, blame.stack_frame_index))
564 # Blame object does not have review url and reviewers.
567 line_content = blame.line_content
568 message = blame.message
570 result = Result(suspected_cl, revision_url, component_name, author, reason,
571 review_url, reviewers, line_content, message)
572 result_list.append(result)