2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
7 at 80 chars, enforcing standard attribute ordering, and standardizing
10 This is quite a bit more complicated than just calling tree.toprettyxml();
11 we need additional customization, like special attribute ordering in tags
12 and wrapping text nodes, so we implement our own full custom XML pretty-printer.
15 from __future__ import with_statement
23 import xml.dom.minidom
28 # Desired order for tag attributes; attributes listed here will appear first,
29 # and in the same order as in these lists.
30 # { tag_name: [attribute_name, ...] }
32 'enum': ['name', 'type'],
33 'histogram': ['name', 'enum', 'units'],
34 'int': ['value', 'label'],
35 'fieldtrial': ['name', 'separator', 'ordering'],
36 'group': ['name', 'label'],
37 'affected-histogram': ['name'],
38 'with-group': ['name'],
41 # Tag names for top-level nodes whose children we don't want to indent.
42 TAGS_THAT_DONT_INDENT = [
43 'histogram-configuration',
49 # Extra vertical spacing rules for special tag names.
50 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
51 TAGS_THAT_HAVE_EXTRA_NEWLINE = {
52 'histogram-configuration': (2, 1, 1),
53 'histograms': (2, 1, 1),
54 'fieldtrials': (2, 1, 1),
56 'histogram': (1, 1, 1),
58 'fieldtrial': (1, 1, 1),
61 # Tags that we allow to be squished into a single line for brevity.
62 TAGS_THAT_ALLOW_SINGLE_LINE = [
67 # Tags whose children we want to alphabetize. The key is the parent tag name,
68 # and the value is a pair of the tag name of the children we want to sort,
69 # and a key function that maps each child node to the desired sort key.
70 ALPHABETIZATION_RULES = {
71 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
72 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
73 'enum': ('int', lambda n: int(n.attributes['value'].value)),
74 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
75 'fieldtrial': ('affected-histogram',
76 lambda n: n.attributes['name'].value.lower()),
80 class Error(Exception):
84 def LastLineLength(s):
85 """Returns the length of the last line in s.
88 s: A multi-line string, including newlines.
91 The length of the last line in s, in characters.
93 if s.rfind('\n') == -1: return len(s)
94 return len(s) - s.rfind('\n') - len('\n')
98 """XML-escapes the given string, replacing magic characters (&<>") with their
99 escaped equivalents."""
100 s = s.replace("&", "&").replace("<", "<")
101 s = s.replace("\"", """).replace(">", ">")
105 def PrettyPrintNode(node, indent=0):
106 """Pretty-prints the given XML node at the given indent level.
109 node: The minidom node to pretty-print.
110 indent: The current indent level.
113 The pretty-printed string (including embedded newlines).
116 Error if the XML has unknown tags or attributes.
118 # Handle the top-level document node.
119 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
120 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
123 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
124 # Wrap each paragraph in the text to fit in the 80 column limit.
125 wrapper = textwrap.TextWrapper()
126 wrapper.initial_indent = ' ' * indent
127 wrapper.subsequent_indent = ' ' * indent
128 wrapper.break_on_hyphens = False
129 wrapper.break_long_words = False
130 wrapper.width = WRAP_COLUMN
131 text = XmlEscape(node.data)
132 # Remove any common indent.
133 text = textwrap.dedent(text.strip('\n'))
134 lines = text.split('\n')
135 # Split the text into paragraphs at blank line boundaries.
138 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
139 paragraphs.append([])
141 paragraphs[-1].append(l)
142 # Remove trailing empty paragraph if present.
143 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
144 paragraphs = paragraphs[:-1]
145 # Wrap each paragraph and separate with two newlines.
146 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
148 # Handle element nodes.
149 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
150 newlines_after_open, newlines_before_close, newlines_after_close = (
151 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
153 s = ' ' * indent + '<' + node.tagName
155 # Calculate how much space to allow for the '>' or '/>'.
157 if not node.childNodes:
160 # Pretty-print the attributes.
161 attributes = node.attributes.keys()
163 # Reorder the attributes.
164 if not node.tagName in ATTRIBUTE_ORDER:
165 unrecognized_attributes = attributes;
167 unrecognized_attributes = (
168 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
170 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
172 for a in unrecognized_attributes:
174 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
175 if unrecognized_attributes:
179 value = XmlEscape(node.attributes[a].value)
180 # Replace sequences of whitespace with single spaces.
181 words = value.split()
182 a_str = ' %s="%s"' % (a, ' '.join(words))
183 # Start a new line if the attribute will make this line too long.
184 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
185 s += '\n' + ' ' * (indent + 3)
186 # Output everything up to the first quote.
188 value_indent_level = LastLineLength(s)
189 # Output one word at a time, splitting to the next line where necessary.
190 column = value_indent_level
191 for i, word in enumerate(words):
192 # This is slightly too conservative since not every word will be
193 # followed by the closing characters...
194 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
195 s = s.rstrip() # remove any trailing whitespace
196 s += '\n' + ' ' * value_indent_level
197 column = value_indent_level
199 column += len(word) + 1
200 s = s.rstrip() # remove any trailing whitespace
202 s = s.rstrip() # remove any trailing whitespace
204 # Pretty-print the child nodes.
207 # Calculate the new indent level for child nodes.
209 if node.tagName not in TAGS_THAT_DONT_INDENT:
211 child_nodes = node.childNodes
213 # Recursively pretty-print the child nodes.
214 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
215 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
217 # Determine whether we can fit the entire node on a single line.
218 close_tag = '</%s>' % node.tagName
219 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
220 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
221 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
222 s += child_nodes[0].strip()
224 s += '\n' * newlines_after_open + '\n'.join(child_nodes)
225 s += '\n' * newlines_before_close + ' ' * indent
229 s += '\n' * newlines_after_close
232 # Handle comment nodes.
233 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
234 return '<!--%s-->\n' % node.data
236 # Ignore other node types. This could be a processing instruction (<? ... ?>)
237 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
238 # histograms XML at present.
239 logging.error('Ignoring unrecognized node data: %s' % node.toxml())
243 def unsafeAppendChild(parent, child):
244 """Append child to parent's list of children, ignoring the possibility that it
245 is already in another node's childNodes list. Requires that the previous
246 parent of child is discarded (to avoid non-tree DOM graphs).
247 This can provide a significant speedup as O(n^2) operations are removed (in
248 particular, each child insertion avoids the need to traverse the old parent's
249 entire list of children)."""
250 child.parentNode = None
251 parent.appendChild(child)
252 child.parentNode = parent
255 def TransformByAlphabetizing(node):
256 """Transform the given XML by alphabetizing specific node types according to
257 the rules in ALPHABETIZATION_RULES.
260 node: The minidom node to transform.
263 The minidom node, with children appropriately alphabetized. Note that the
264 transformation is done in-place, i.e. the original minidom tree is modified
267 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
268 for c in node.childNodes: TransformByAlphabetizing(c)
271 # Element node with a tag name that we alphabetize the children of?
272 if node.tagName in ALPHABETIZATION_RULES:
273 # Put subnodes in a list of node,key pairs to allow for custom sorting.
274 subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
277 for c in node.childNodes:
278 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
279 c.tagName == subtag):
280 last_key = key_function(c)
281 # Subnodes that we don't want to rearrange use the last node's key,
282 # so they stay in the same relative position.
283 subnodes.append( (c, last_key) )
285 # Sort the subnode list.
286 subnodes.sort(key=lambda pair: pair[1])
288 # Re-add the subnodes, transforming each recursively.
289 while node.firstChild:
290 node.removeChild(node.firstChild)
291 for (c, _) in subnodes:
292 unsafeAppendChild(node, TransformByAlphabetizing(c))
295 # Recursively handle other element nodes and other node types.
296 for c in node.childNodes: TransformByAlphabetizing(c)
300 def PrettyPrint(raw_xml):
301 """Pretty-print the given XML.
304 xml: The contents of the histograms XML file, as a string.
307 The pretty-printed version.
309 tree = xml.dom.minidom.parseString(raw_xml)
310 tree = TransformByAlphabetizing(tree)
311 return PrettyPrintNode(tree)
315 logging.basicConfig(level=logging.INFO)
317 presubmit = ('--presubmit' in sys.argv)
319 logging.info('Loading histograms.xml...')
320 with open('histograms.xml', 'rb') as f:
323 # Check there are no CR ('\r') characters in the file.
325 logging.info('DOS-style line endings (CR characters) detected - these are '
326 'not allowed. Please run dos2unix histograms.xml')
329 logging.info('Pretty-printing...')
331 pretty = PrettyPrint(xml)
333 logging.error('Aborting parsing due to fatal errors.')
337 logging.info('histograms.xml is correctly pretty-printed.')
340 logging.info('histograms.xml is not formatted correctly; run '
341 'pretty_print.py to fix.')
343 if not diffutil.PromptUserToAcceptDiff(
345 'Is the prettified version acceptable?'):
346 logging.error('Aborting')
349 logging.info('Creating backup file histograms.before.pretty-print.xml')
350 shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
352 logging.info('Writing new histograms.xml file')
353 with open('histograms.xml', 'wb') as f:
357 if __name__ == '__main__':