1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.html.diff</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.html-module.html">Package html</a> ::
51 <table cellpadding="0" cellspacing="0">
52 <!-- hide/show private -->
53 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54 onclick="toggle_private();">hide private</a>]</span></td></tr>
55 <tr><td align="right"><span class="options"
56 >[<a href="frames.html" target="_top">frames</a
57 >] | <a href="lxml.html.diff-module.html"
58 target="_top">no frames</a>]</span></td></tr>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module diff</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.diff-pysrc.html">source code</a></span></p>
65 <!-- ==================== CLASSES ==================== -->
66 <a name="section-Classes"></a>
67 <table class="summary" border="1" cellpadding="3"
68 cellspacing="0" width="100%" bgcolor="white">
69 <tr bgcolor="#70b0f0" class="table-header">
70 <td colspan="2" class="table-header">
71 <table border="0" cellpadding="0" cellspacing="0" width="100%">
73 <td align="left"><span class="table-header">Classes</span></td>
74 <td align="right" valign="top"
75 ><span class="options">[<a href="#section-Classes"
76 class="privatelink" onclick="toggle_private();"
77 >hide private</a>]</span></td>
83 <td width="15%" align="right" valign="top" class="summary">
84 <span class="summary-type"> </span>
85 </td><td class="summary">
86 <a href="str-class.html" class="summary-name" onclick="show_private();">basestring</a><br />
87 str(object='') -> string
91 <td width="15%" align="right" valign="top" class="summary">
92 <span class="summary-type"> </span>
93 </td><td class="summary">
94 <a href="lxml.html.diff.DEL_START-class.html" class="summary-name" onclick="show_private();">DEL_START</a>
98 <td width="15%" align="right" valign="top" class="summary">
99 <span class="summary-type"> </span>
100 </td><td class="summary">
101 <a href="lxml.html.diff.DEL_END-class.html" class="summary-name" onclick="show_private();">DEL_END</a>
105 <td width="15%" align="right" valign="top" class="summary">
106 <span class="summary-type"> </span>
107 </td><td class="summary">
108 <a href="lxml.html.diff.NoDeletes-class.html" class="summary-name" onclick="show_private();">NoDeletes</a><br />
109 Raised when the document no longer contains any pending deletes
114 <td width="15%" align="right" valign="top" class="summary">
115 <span class="summary-type"> </span>
116 </td><td class="summary">
117 <a href="lxml.html.diff.token-class.html" class="summary-name" onclick="show_private();">token</a><br />
118 Represents a diffable token, generally a word that is displayed to
119 the user. Opening tags are attached to this token when they are
120 adjacent (pre_tags) and closing tags that follow the word
121 (post_tags). Some exceptions occur when there are empty tags
122 adjacent to a word, so there may be close tags in pre_tags, or
123 open tags in post_tags.
127 <td width="15%" align="right" valign="top" class="summary">
128 <span class="summary-type"> </span>
129 </td><td class="summary">
130 <a href="lxml.html.diff.tag_token-class.html" class="summary-name" onclick="show_private();">tag_token</a><br />
131 Represents a token that is actually a tag. Currently this is just
132 the <img> tag, which takes up visible space just like a word but
133 is only represented in a document by a tag.
137 <td width="15%" align="right" valign="top" class="summary">
138 <span class="summary-type"> </span>
139 </td><td class="summary">
140 <a href="lxml.html.diff.href_token-class.html" class="summary-name" onclick="show_private();">href_token</a><br />
141 Represents the href in an anchor tag. Unlike other words, we only
142 show the href when it changes.
146 <td width="15%" align="right" valign="top" class="summary">
147 <span class="summary-type"> </span>
148 </td><td class="summary">
149 <a href="lxml.html.diff.InsensitiveSequenceMatcher-class.html" class="summary-name" onclick="show_private();">InsensitiveSequenceMatcher</a><br />
150 Acts like SequenceMatcher, but tries not to find very small equal
151 blocks amidst large spans of changes
155 <!-- ==================== FUNCTIONS ==================== -->
156 <a name="section-Functions"></a>
157 <table class="summary" border="1" cellpadding="3"
158 cellspacing="0" width="100%" bgcolor="white">
159 <tr bgcolor="#70b0f0" class="table-header">
160 <td colspan="2" class="table-header">
161 <table border="0" cellpadding="0" cellspacing="0" width="100%">
163 <td align="left"><span class="table-header">Functions</span></td>
164 <td align="right" valign="top"
165 ><span class="options">[<a href="#section-Functions"
166 class="privatelink" onclick="toggle_private();"
167 >hide private</a>]</span></td>
173 <td width="15%" align="right" valign="top" class="summary">
174 <span class="summary-type"> </span>
175 </td><td class="summary">
176 <table width="100%" cellpadding="0" cellspacing="0" border="0">
178 <td><span class="summary-sig"><a name="default_markup"></a><span class="summary-sig-name">default_markup</span>(<span class="summary-sig-arg">text</span>,
179 <span class="summary-sig-arg">version</span>)</span></td>
180 <td align="right" valign="top">
181 <span class="codelink"><a href="lxml.html.diff-pysrc.html#default_markup">source code</a></span>
190 <td width="15%" align="right" valign="top" class="summary">
191 <span class="summary-type"> </span>
192 </td><td class="summary">
193 <table width="100%" cellpadding="0" cellspacing="0" border="0">
195 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#html_annotate" class="summary-sig-name">html_annotate</a>(<span class="summary-sig-arg">doclist</span>,
196 <span class="summary-sig-arg">markup</span>=<span class="summary-sig-default">default_markup</span>)</span><br />
197 doclist should be ordered from oldest to newest, like:</td>
198 <td align="right" valign="top">
199 <span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate">source code</a></span>
208 <td width="15%" align="right" valign="top" class="summary">
209 <span class="summary-type"> </span>
210 </td><td class="summary">
211 <table width="100%" cellpadding="0" cellspacing="0" border="0">
213 <td><span class="summary-sig"><a name="tokenize_annotated"></a><span class="summary-sig-name">tokenize_annotated</span>(<span class="summary-sig-arg">doc</span>,
214 <span class="summary-sig-arg">annotation</span>)</span><br />
215 Tokenize a document and add an annotation attribute to each token</td>
216 <td align="right" valign="top">
217 <span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize_annotated">source code</a></span>
226 <td width="15%" align="right" valign="top" class="summary">
227 <span class="summary-type"> </span>
228 </td><td class="summary">
229 <table width="100%" cellpadding="0" cellspacing="0" border="0">
231 <td><span class="summary-sig"><a name="html_annotate_merge_annotations"></a><span class="summary-sig-name">html_annotate_merge_annotations</span>(<span class="summary-sig-arg">tokens_old</span>,
232 <span class="summary-sig-arg">tokens_new</span>)</span><br />
233 Merge the annotations from tokens_old into tokens_new, when the
234 tokens in the new document already existed in the old document.</td>
235 <td align="right" valign="top">
236 <span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate_merge_annotations">source code</a></span>
245 <td width="15%" align="right" valign="top" class="summary">
246 <span class="summary-type"> </span>
247 </td><td class="summary">
248 <table width="100%" cellpadding="0" cellspacing="0" border="0">
250 <td><span class="summary-sig"><a name="copy_annotations"></a><span class="summary-sig-name">copy_annotations</span>(<span class="summary-sig-arg">src</span>,
251 <span class="summary-sig-arg">dest</span>)</span><br />
252 Copy annotations from the tokens listed in src to the tokens in dest</td>
253 <td align="right" valign="top">
254 <span class="codelink"><a href="lxml.html.diff-pysrc.html#copy_annotations">source code</a></span>
263 <td width="15%" align="right" valign="top" class="summary">
264 <span class="summary-type"> </span>
265 </td><td class="summary">
266 <table width="100%" cellpadding="0" cellspacing="0" border="0">
268 <td><span class="summary-sig"><a name="compress_tokens"></a><span class="summary-sig-name">compress_tokens</span>(<span class="summary-sig-arg">tokens</span>)</span><br />
269 Combine adjacent tokens when there is no HTML between the tokens,
270 and they share an annotation</td>
271 <td align="right" valign="top">
272 <span class="codelink"><a href="lxml.html.diff-pysrc.html#compress_tokens">source code</a></span>
281 <td width="15%" align="right" valign="top" class="summary">
282 <span class="summary-type"> </span>
283 </td><td class="summary">
284 <table width="100%" cellpadding="0" cellspacing="0" border="0">
286 <td><span class="summary-sig"><a name="compress_merge_back"></a><span class="summary-sig-name">compress_merge_back</span>(<span class="summary-sig-arg">tokens</span>,
287 <span class="summary-sig-arg">tok</span>)</span><br />
288 Merge tok into the last element of tokens (modifying the list of
289 tokens in-place).</td>
290 <td align="right" valign="top">
291 <span class="codelink"><a href="lxml.html.diff-pysrc.html#compress_merge_back">source code</a></span>
300 <td width="15%" align="right" valign="top" class="summary">
301 <span class="summary-type"> </span>
302 </td><td class="summary">
303 <table width="100%" cellpadding="0" cellspacing="0" border="0">
305 <td><span class="summary-sig"><a name="markup_serialize_tokens"></a><span class="summary-sig-name">markup_serialize_tokens</span>(<span class="summary-sig-arg">tokens</span>,
306 <span class="summary-sig-arg">markup_func</span>)</span><br />
307 Serialize the list of tokens into a list of text chunks, calling
308 markup_func around text to add annotations.</td>
309 <td align="right" valign="top">
310 <span class="codelink"><a href="lxml.html.diff-pysrc.html#markup_serialize_tokens">source code</a></span>
319 <td width="15%" align="right" valign="top" class="summary">
320 <span class="summary-type"> </span>
321 </td><td class="summary">
322 <table width="100%" cellpadding="0" cellspacing="0" border="0">
324 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#htmldiff" class="summary-sig-name">htmldiff</a>(<span class="summary-sig-arg">old_html</span>,
325 <span class="summary-sig-arg">new_html</span>)</span><br />
326 Do a diff of the old and new document. The documents are HTML
327 <em>fragments</em> (str/UTF8 or unicode), they are not complete documents
328 (i.e., no <html> tag).</td>
329 <td align="right" valign="top">
330 <span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff">source code</a></span>
339 <td width="15%" align="right" valign="top" class="summary">
340 <span class="summary-type"> </span>
341 </td><td class="summary">
342 <table width="100%" cellpadding="0" cellspacing="0" border="0">
344 <td><span class="summary-sig"><a name="htmldiff_tokens"></a><span class="summary-sig-name">htmldiff_tokens</span>(<span class="summary-sig-arg">html1_tokens</span>,
345 <span class="summary-sig-arg">html2_tokens</span>)</span><br />
346 Does a diff on the tokens themselves, returning a list of text
347 chunks (not tokens).</td>
348 <td align="right" valign="top">
349 <span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff_tokens">source code</a></span>
358 <td width="15%" align="right" valign="top" class="summary">
359 <span class="summary-type"> </span>
360 </td><td class="summary">
361 <table width="100%" cellpadding="0" cellspacing="0" border="0">
363 <td><span class="summary-sig"><a name="expand_tokens"></a><span class="summary-sig-name">expand_tokens</span>(<span class="summary-sig-arg">tokens</span>,
364 <span class="summary-sig-arg">equal</span>=<span class="summary-sig-default">False</span>)</span><br />
365 Given a list of tokens, return a generator of the chunks of
366 text for the data in the tokens.</td>
367 <td align="right" valign="top">
368 <span class="codelink"><a href="lxml.html.diff-pysrc.html#expand_tokens">source code</a></span>
377 <td width="15%" align="right" valign="top" class="summary">
378 <span class="summary-type"> </span>
379 </td><td class="summary">
380 <table width="100%" cellpadding="0" cellspacing="0" border="0">
382 <td><span class="summary-sig"><a name="merge_insert"></a><span class="summary-sig-name">merge_insert</span>(<span class="summary-sig-arg">ins_chunks</span>,
383 <span class="summary-sig-arg">doc</span>)</span><br />
384 doc is the already-handled document (as a list of text chunks);
385 here we add <ins>ins_chunks</ins> to the end of that.</td>
386 <td align="right" valign="top">
387 <span class="codelink"><a href="lxml.html.diff-pysrc.html#merge_insert">source code</a></span>
396 <td width="15%" align="right" valign="top" class="summary">
397 <span class="summary-type"> </span>
398 </td><td class="summary">
399 <table width="100%" cellpadding="0" cellspacing="0" border="0">
401 <td><span class="summary-sig"><a name="merge_delete"></a><span class="summary-sig-name">merge_delete</span>(<span class="summary-sig-arg">del_chunks</span>,
402 <span class="summary-sig-arg">doc</span>)</span><br />
403 Adds the text chunks in del_chunks to the document doc (another
404 list of text chunks) with marker to show it is a delete.
405 cleanup_delete later resolves these markers into <del> tags.</td>
406 <td align="right" valign="top">
407 <span class="codelink"><a href="lxml.html.diff-pysrc.html#merge_delete">source code</a></span>
416 <td width="15%" align="right" valign="top" class="summary">
417 <span class="summary-type"> </span>
418 </td><td class="summary">
419 <table width="100%" cellpadding="0" cellspacing="0" border="0">
421 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#cleanup_delete" class="summary-sig-name" onclick="show_private();">cleanup_delete</a>(<span class="summary-sig-arg">chunks</span>)</span><br />
422 Cleans up any DEL_START/DEL_END markers in the document, replacing
423 them with <del></del>. To do this while keeping the document
424 valid, it may need to drop some tags (either start or end tags).</td>
425 <td align="right" valign="top">
426 <span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_delete">source code</a></span>
435 <td width="15%" align="right" valign="top" class="summary">
436 <span class="summary-type"> </span>
437 </td><td class="summary">
438 <table width="100%" cellpadding="0" cellspacing="0" border="0">
440 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#split_unbalanced" class="summary-sig-name" onclick="show_private();">split_unbalanced</a>(<span class="summary-sig-arg">chunks</span>)</span><br />
441 Return (unbalanced_start, balanced, unbalanced_end), where each is
442 a list of text and tag chunks.</td>
443 <td align="right" valign="top">
444 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_unbalanced">source code</a></span>
453 <td width="15%" align="right" valign="top" class="summary">
454 <span class="summary-type"> </span>
455 </td><td class="summary">
456 <table width="100%" cellpadding="0" cellspacing="0" border="0">
458 <td><span class="summary-sig"><a name="split_delete"></a><span class="summary-sig-name">split_delete</span>(<span class="summary-sig-arg">chunks</span>)</span><br />
459 Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
460 stuff_after_DEL_END). Returns the first case found (there may be
461 more DEL_STARTs in stuff_after_DEL_END). Raises NoDeletes if
462 there's no DEL_START found.</td>
463 <td align="right" valign="top">
464 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_delete">source code</a></span>
473 <td width="15%" align="right" valign="top" class="summary">
474 <span class="summary-type"> </span>
475 </td><td class="summary">
476 <table width="100%" cellpadding="0" cellspacing="0" border="0">
478 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#locate_unbalanced_start" class="summary-sig-name" onclick="show_private();">locate_unbalanced_start</a>(<span class="summary-sig-arg">unbalanced_start</span>,
479 <span class="summary-sig-arg">pre_delete</span>,
480 <span class="summary-sig-arg">post_delete</span>)</span><br />
481 pre_delete and post_delete implicitly point to a place in the
482 document (where the two were split). This moves that point (by
483 popping items from one and pushing them onto the other). It moves
484 the point to try to find a place where unbalanced_start applies.</td>
485 <td align="right" valign="top">
486 <span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_start">source code</a></span>
495 <td width="15%" align="right" valign="top" class="summary">
496 <span class="summary-type"> </span>
497 </td><td class="summary">
498 <table width="100%" cellpadding="0" cellspacing="0" border="0">
500 <td><span class="summary-sig"><a name="locate_unbalanced_end"></a><span class="summary-sig-name">locate_unbalanced_end</span>(<span class="summary-sig-arg">unbalanced_end</span>,
501 <span class="summary-sig-arg">pre_delete</span>,
502 <span class="summary-sig-arg">post_delete</span>)</span><br />
503 like locate_unbalanced_start, except handling end tags and
504 possibly moving the point earlier in the document.</td>
505 <td align="right" valign="top">
506 <span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_end">source code</a></span>
515 <td width="15%" align="right" valign="top" class="summary">
516 <span class="summary-type"> </span>
517 </td><td class="summary">
518 <table width="100%" cellpadding="0" cellspacing="0" border="0">
520 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#tokenize" class="summary-sig-name" onclick="show_private();">tokenize</a>(<span class="summary-sig-arg">html</span>,
521 <span class="summary-sig-arg">include_hrefs</span>=<span class="summary-sig-default">True</span>)</span><br />
522 Parse the given HTML and returns token objects (words with attached tags).</td>
523 <td align="right" valign="top">
524 <span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize">source code</a></span>
533 <td width="15%" align="right" valign="top" class="summary">
534 <span class="summary-type"> </span>
535 </td><td class="summary">
536 <table width="100%" cellpadding="0" cellspacing="0" border="0">
538 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#parse_html" class="summary-sig-name" onclick="show_private();">parse_html</a>(<span class="summary-sig-arg">html</span>,
539 <span class="summary-sig-arg">cleanup</span>=<span class="summary-sig-default">True</span>)</span><br />
540 Parses an HTML fragment, returning an lxml element. Note that the HTML will be
541 wrapped in a <div> tag that was not in the original document.</td>
542 <td align="right" valign="top">
543 <span class="codelink"><a href="lxml.html.diff-pysrc.html#parse_html">source code</a></span>
552 <td width="15%" align="right" valign="top" class="summary">
553 <span class="summary-type"> </span>
554 </td><td class="summary">
555 <table width="100%" cellpadding="0" cellspacing="0" border="0">
557 <td><span class="summary-sig"><a name="cleanup_html"></a><span class="summary-sig-name">cleanup_html</span>(<span class="summary-sig-arg">html</span>)</span><br />
558 This 'cleans' the HTML, meaning that any page structure is removed
559 (only the contents of <body> are used, if there is any <body).
560 Also <ins> and <del> tags are removed.</td>
561 <td align="right" valign="top">
562 <span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_html">source code</a></span>
571 <td width="15%" align="right" valign="top" class="summary">
572 <span class="summary-type"> </span>
573 </td><td class="summary">
574 <table width="100%" cellpadding="0" cellspacing="0" border="0">
576 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#split_trailing_whitespace" class="summary-sig-name" onclick="show_private();">split_trailing_whitespace</a>(<span class="summary-sig-arg">word</span>)</span><br />
577 This function takes a word, such as 'test</td>
578 <td align="right" valign="top">
579 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_trailing_whitespace">source code</a></span>
588 <td width="15%" align="right" valign="top" class="summary">
589 <span class="summary-type"> </span>
590 </td><td class="summary">
591 <table width="100%" cellpadding="0" cellspacing="0" border="0">
593 <td><span class="summary-sig"><a name="fixup_chunks"></a><span class="summary-sig-name">fixup_chunks</span>(<span class="summary-sig-arg">chunks</span>)</span><br />
594 This function takes a list of chunks and produces a list of tokens.</td>
595 <td align="right" valign="top">
596 <span class="codelink"><a href="lxml.html.diff-pysrc.html#fixup_chunks">source code</a></span>
605 <td width="15%" align="right" valign="top" class="summary">
606 <span class="summary-type"> </span>
607 </td><td class="summary">
608 <table width="100%" cellpadding="0" cellspacing="0" border="0">
610 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#flatten_el" class="summary-sig-name" onclick="show_private();">flatten_el</a>(<span class="summary-sig-arg">el</span>,
611 <span class="summary-sig-arg">include_hrefs</span>,
612 <span class="summary-sig-arg">skip_tag</span>=<span class="summary-sig-default">False</span>)</span><br />
613 Takes an lxml element el, and generates all the text chunks for
614 that tag. Each start tag is a chunk, each word is a chunk, and each
615 end tag is a chunk.</td>
616 <td align="right" valign="top">
617 <span class="codelink"><a href="lxml.html.diff-pysrc.html#flatten_el">source code</a></span>
626 <td width="15%" align="right" valign="top" class="summary">
627 <span class="summary-type"> </span>
628 </td><td class="summary">
629 <table width="100%" cellpadding="0" cellspacing="0" border="0">
631 <td><span class="summary-sig"><a name="split_words"></a><span class="summary-sig-name">split_words</span>(<span class="summary-sig-arg">text</span>)</span><br />
632 Splits some text into words. Includes trailing whitespace
633 on each word when appropriate.</td>
634 <td align="right" valign="top">
635 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_words">source code</a></span>
644 <td width="15%" align="right" valign="top" class="summary">
645 <span class="summary-type"> </span>
646 </td><td class="summary">
647 <table width="100%" cellpadding="0" cellspacing="0" border="0">
649 <td><span class="summary-sig"><a name="start_tag"></a><span class="summary-sig-name">start_tag</span>(<span class="summary-sig-arg">el</span>)</span><br />
650 The text representation of the start tag for a tag.</td>
651 <td align="right" valign="top">
652 <span class="codelink"><a href="lxml.html.diff-pysrc.html#start_tag">source code</a></span>
661 <td width="15%" align="right" valign="top" class="summary">
662 <span class="summary-type"> </span>
663 </td><td class="summary">
664 <table width="100%" cellpadding="0" cellspacing="0" border="0">
666 <td><span class="summary-sig"><a name="end_tag"></a><span class="summary-sig-name">end_tag</span>(<span class="summary-sig-arg">el</span>)</span><br />
667 The text representation of an end tag for a tag. Includes
668 trailing whitespace when appropriate.</td>
669 <td align="right" valign="top">
670 <span class="codelink"><a href="lxml.html.diff-pysrc.html#end_tag">source code</a></span>
679 <td width="15%" align="right" valign="top" class="summary">
680 <span class="summary-type"> </span>
681 </td><td class="summary">
682 <table width="100%" cellpadding="0" cellspacing="0" border="0">
684 <td><span class="summary-sig"><a name="is_word"></a><span class="summary-sig-name">is_word</span>(<span class="summary-sig-arg">tok</span>)</span></td>
685 <td align="right" valign="top">
686 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_word">source code</a></span>
695 <td width="15%" align="right" valign="top" class="summary">
696 <span class="summary-type"> </span>
697 </td><td class="summary">
698 <table width="100%" cellpadding="0" cellspacing="0" border="0">
700 <td><span class="summary-sig"><a name="is_end_tag"></a><span class="summary-sig-name">is_end_tag</span>(<span class="summary-sig-arg">tok</span>)</span></td>
701 <td align="right" valign="top">
702 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_end_tag">source code</a></span>
711 <td width="15%" align="right" valign="top" class="summary">
712 <span class="summary-type"> </span>
713 </td><td class="summary">
714 <table width="100%" cellpadding="0" cellspacing="0" border="0">
716 <td><span class="summary-sig"><a name="is_start_tag"></a><span class="summary-sig-name">is_start_tag</span>(<span class="summary-sig-arg">tok</span>)</span></td>
717 <td align="right" valign="top">
718 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_start_tag">source code</a></span>
727 <td width="15%" align="right" valign="top" class="summary">
728 <span class="summary-type"> </span>
729 </td><td class="summary">
730 <table width="100%" cellpadding="0" cellspacing="0" border="0">
732 <td><span class="summary-sig"><a name="fixup_ins_del_tags"></a><span class="summary-sig-name">fixup_ins_del_tags</span>(<span class="summary-sig-arg">html</span>)</span><br />
733 Given an html string, move any <ins> or <del> tags inside of any
734 block-level elements, e.g. transform <ins><p>word</p></ins> to
735 <p><ins>word</ins></p></td>
736 <td align="right" valign="top">
737 <span class="codelink"><a href="lxml.html.diff-pysrc.html#fixup_ins_del_tags">source code</a></span>
746 <td width="15%" align="right" valign="top" class="summary">
747 <span class="summary-type"> </span>
748 </td><td class="summary">
749 <table width="100%" cellpadding="0" cellspacing="0" border="0">
751 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#serialize_html_fragment" class="summary-sig-name" onclick="show_private();">serialize_html_fragment</a>(<span class="summary-sig-arg">el</span>,
752 <span class="summary-sig-arg">skip_outer</span>=<span class="summary-sig-default">False</span>)</span><br />
753 Serialize a single lxml element as HTML. The serialized form
754 includes the elements tail.</td>
755 <td align="right" valign="top">
756 <span class="codelink"><a href="lxml.html.diff-pysrc.html#serialize_html_fragment">source code</a></span>
765 <td width="15%" align="right" valign="top" class="summary">
766 <span class="summary-type"> </span>
767 </td><td class="summary">
768 <table width="100%" cellpadding="0" cellspacing="0" border="0">
770 <td><span class="summary-sig"><a name="_fixup_ins_del_tags"></a><span class="summary-sig-name">_fixup_ins_del_tags</span>(<span class="summary-sig-arg">doc</span>)</span><br />
771 fixup_ins_del_tags that works on an lxml document in-place</td>
772 <td align="right" valign="top">
773 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_fixup_ins_del_tags">source code</a></span>
782 <td width="15%" align="right" valign="top" class="summary">
783 <span class="summary-type"> </span>
784 </td><td class="summary">
785 <table width="100%" cellpadding="0" cellspacing="0" border="0">
787 <td><span class="summary-sig"><a name="_contains_block_level_tag"></a><span class="summary-sig-name">_contains_block_level_tag</span>(<span class="summary-sig-arg">el</span>)</span><br />
788 True if the element contains any block-level elements, like <p>, <td>, etc.</td>
789 <td align="right" valign="top">
790 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_contains_block_level_tag">source code</a></span>
799 <td width="15%" align="right" valign="top" class="summary">
800 <span class="summary-type"> </span>
801 </td><td class="summary">
802 <table width="100%" cellpadding="0" cellspacing="0" border="0">
804 <td><span class="summary-sig"><a name="_move_el_inside_block"></a><span class="summary-sig-name">_move_el_inside_block</span>(<span class="summary-sig-arg">el</span>,
805 <span class="summary-sig-arg">tag</span>)</span><br />
806 helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
807 and moves them inside any block-level tags.</td>
808 <td align="right" valign="top">
809 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_move_el_inside_block">source code</a></span>
818 <td width="15%" align="right" valign="top" class="summary">
819 <span class="summary-type"> </span>
820 </td><td class="summary">
821 <table width="100%" cellpadding="0" cellspacing="0" border="0">
823 <td><span class="summary-sig"><a name="_merge_element_contents"></a><span class="summary-sig-name">_merge_element_contents</span>(<span class="summary-sig-arg">el</span>)</span><br />
824 Removes an element, but merges its contents into its place, e.g.,
825 given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
826 <p>Hi there!</p></td>
827 <td align="right" valign="top">
828 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_merge_element_contents">source code</a></span>
837 <!-- ==================== VARIABLES ==================== -->
838 <a name="section-Variables"></a>
839 <table class="summary" border="1" cellpadding="3"
840 cellspacing="0" width="100%" bgcolor="white">
841 <tr bgcolor="#70b0f0" class="table-header">
842 <td colspan="2" class="table-header">
843 <table border="0" cellpadding="0" cellspacing="0" width="100%">
845 <td align="left"><span class="table-header">Variables</span></td>
846 <td align="right" valign="top"
847 ><span class="options">[<a href="#section-Variables"
848 class="privatelink" onclick="toggle_private();"
849 >hide private</a>]</span></td>
855 <td width="15%" align="right" valign="top" class="summary">
856 <span class="summary-type"> </span>
857 </td><td class="summary">
858 <a name="_body_re"></a><span class="summary-name">_body_re</span> = <code title="re.compile(r'(?is)<body.*?>')">re.compile(r'<code class="re-flags">(?is)</code><body.<code class="re-op">*?</code>>')</code>
862 <td width="15%" align="right" valign="top" class="summary">
863 <span class="summary-type"> </span>
864 </td><td class="summary">
865 <a name="_end_body_re"></a><span class="summary-name">_end_body_re</span> = <code title="re.compile(r'(?is)</body.*?>')">re.compile(r'<code class="re-flags">(?is)</code></body.<code class="re-op">*?</code>>')</code>
869 <td width="15%" align="right" valign="top" class="summary">
870 <span class="summary-type"> </span>
871 </td><td class="summary">
872 <a name="_ins_del_re"></a><span class="summary-name">_ins_del_re</span> = <code title="re.compile(r'(?is)</?(ins|del).*?>')">re.compile(r'<code class="re-flags">(?is)</code></<code class="re-op">?</code><code class="re-group">(</code>ins<code class="re-op">|</code>del<code class="re-group">)</code>.<code class="re-op">*?</code>>')</code>
876 <td width="15%" align="right" valign="top" class="summary">
877 <span class="summary-type"> </span>
878 </td><td class="summary">
879 <a name="end_whitespace_re"></a><span class="summary-name">end_whitespace_re</span> = <code title="re.compile(r'[ \t\n\r]$')">re.compile(r'<code class="re-group">[</code> \t\n\r<code class="re-group">]</code>$')</code>
883 <td width="15%" align="right" valign="top" class="summary">
884 <span class="summary-type"> </span>
885 </td><td class="summary">
886 <a href="lxml.html.diff-module.html#empty_tags" class="summary-name" onclick="show_private();">empty_tags</a> = <code title="(u'param',
894 ..."><code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">param</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">img</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">area</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">br</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">basefont</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u</code><code class="variable-ellipsis">...</code></code>
898 <td width="15%" align="right" valign="top" class="summary">
899 <span class="summary-type"> </span>
900 </td><td class="summary">
901 <a href="lxml.html.diff-module.html#block_level_tags" class="summary-name" onclick="show_private();">block_level_tags</a> = <code title="(u'address',
909 ..."><code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">address</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">blockquote</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">center</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">di</code><code class="variable-ellipsis">...</code></code>
913 <td width="15%" align="right" valign="top" class="summary">
914 <span class="summary-type"> </span>
915 </td><td class="summary">
916 <a href="lxml.html.diff-module.html#block_level_container_tags" class="summary-name" onclick="show_private();">block_level_container_tags</a> = <code title="(u'dd',
924 ..."><code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">dd</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">dt</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">frameset</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">u'</code><code class="variable-string">li</code><code class="variable-quote">'</code><code class="variable-ellipsis">...</code></code>
928 <td width="15%" align="right" valign="top" class="summary">
929 <span class="summary-type"> </span>
930 </td><td class="summary">
931 <a name="split_words_re"></a><span class="summary-name">split_words_re</span> = <code title="re.compile(r'(?u)\S+(?:\s+|$)')">re.compile(r'<code class="re-flags">(?u)</code>\S<code class="re-op">+</code><code class="re-group">(?:</code>\s<code class="re-op">+</code><code class="re-op">|</code>$<code class="re-group">)</code>')</code>
935 <td width="15%" align="right" valign="top" class="summary">
936 <span class="summary-type"> </span>
937 </td><td class="summary">
938 <a name="start_whitespace_re"></a><span class="summary-name">start_whitespace_re</span> = <code title="re.compile(r'^[ \t\n\r]')">re.compile(r'^<code class="re-group">[</code> \t\n\r<code class="re-group">]</code>')</code>
942 <td width="15%" align="right" valign="top" class="summary">
943 <span class="summary-type"> </span>
944 </td><td class="summary">
945 <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code><br />
950 <td width="15%" align="right" valign="top" class="summary">
951 <span class="summary-type"> </span>
952 </td><td class="summary">
953 <a href="lxml.html.diff-module.html#__test__" class="summary-name" onclick="show_private();">__test__</a> = <code title="{u'html_annotate (line 35)': u'''
954 doclist should be ordered from oldest to newest, like::
956 >>> version1 = 'Hello World'
957 >>> version2 = 'Goodbye World'
958 >>> print(html_annotate([(version1, 'version 1'),
959 ... (version2, 'version 2')]))
960 <span title="version 2">Goodbye</span> <span title="version 1"\
961 ..."><code class="variable-group">{</code><code class="variable-quote">u'</code><code class="variable-string">html_annotate (line 35)</code><code class="variable-quote">'</code><code class="variable-op">:</code><code class="variable-ellipsis">...</code></code>
965 <!-- ==================== FUNCTION DETAILS ==================== -->
966 <a name="section-FunctionDetails"></a>
967 <table class="details" border="1" cellpadding="3"
968 cellspacing="0" width="100%" bgcolor="white">
969 <tr bgcolor="#70b0f0" class="table-header">
970 <td colspan="2" class="table-header">
971 <table border="0" cellpadding="0" cellspacing="0" width="100%">
973 <td align="left"><span class="table-header">Function Details</span></td>
974 <td align="right" valign="top"
975 ><span class="options">[<a href="#section-FunctionDetails"
976 class="privatelink" onclick="toggle_private();"
977 >hide private</a>]</span></td>
983 <a name="html_annotate"></a>
985 <table class="details" border="1" cellpadding="3"
986 cellspacing="0" width="100%" bgcolor="white">
988 <table width="100%" cellpadding="0" cellspacing="0" border="0">
989 <tr valign="top"><td>
990 <h3 class="epydoc"><span class="sig"><span class="sig-name">html_annotate</span>(<span class="sig-arg">doclist</span>,
991 <span class="sig-arg">markup</span>=<span class="sig-default">default_markup</span>)</span>
993 </td><td align="right" valign="top"
994 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate">source code</a></span>
998 <p>doclist should be ordered from oldest to newest, like:</p>
999 <pre class="rst-literal-block">
1000 >>> version1 = 'Hello World'
1001 >>> version2 = 'Goodbye World'
1002 >>> print(html_annotate([(version1, 'version 1'),
1003 ... (version2, 'version 2')]))
1004 <span title="version 2">Goodbye</span> <span title="version 1">World</span>
1006 <p>The documents must be <em>fragments</em> (str/UTF8 or unicode), not
1007 complete documents</p>
1008 <p>The markup argument is a function to markup the spans of words.
1009 This function is called like markup('Hello', 'version 2'), and
1010 returns HTML. The first argument is text and never includes any
1011 markup. The default uses a span with a title:</p>
1013 <pre class="py-doctest">
1014 <span class="py-prompt">>>> </span><span class="py-keyword">print</span>(default_markup(<span class="py-string">'Some Text'</span>, <span class="py-string">'by Joe'</span>))
1015 <span class="py-output"><span title="by Joe">Some Text</span></span></pre>
1021 <a name="htmldiff"></a>
1023 <table class="details" border="1" cellpadding="3"
1024 cellspacing="0" width="100%" bgcolor="white">
1026 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1027 <tr valign="top"><td>
1028 <h3 class="epydoc"><span class="sig"><span class="sig-name">htmldiff</span>(<span class="sig-arg">old_html</span>,
1029 <span class="sig-arg">new_html</span>)</span>
1031 </td><td align="right" valign="top"
1032 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff">source code</a></span>
1036 <p>Do a diff of the old and new document. The documents are HTML
1037 <em>fragments</em> (str/UTF8 or unicode), they are not complete documents
1038 (i.e., no <html> tag).</p>
1039 <p>Returns HTML with <ins> and <del> tags added around the
1040 appropriate text.</p>
1041 <p>Markup is generally ignored, with the markup from new_html
1042 preserved, and possibly some markup from old_html (though it is
1043 considered acceptable to lose some of the old markup). Only the
1044 words in the HTML are diffed. The exception is <img> tags, which
1045 are treated like words, and the href attribute of <a> tags, which
1046 are noted inside the tag itself when there are changes.</p>
1051 <a name="cleanup_delete"></a>
1052 <div class="private">
1053 <table class="details" border="1" cellpadding="3"
1054 cellspacing="0" width="100%" bgcolor="white">
1056 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1057 <tr valign="top"><td>
1058 <h3 class="epydoc"><span class="sig"><span class="sig-name">cleanup_delete</span>(<span class="sig-arg">chunks</span>)</span>
1060 </td><td align="right" valign="top"
1061 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_delete">source code</a></span>
1065 <p>Cleans up any DEL_START/DEL_END markers in the document, replacing
1066 them with <del></del>. To do this while keeping the document
1067 valid, it may need to drop some tags (either start or end tags).</p>
1068 <p>It may also move the del into adjacent tags to try to move it to a
1069 similar location where it was originally located (e.g., moving a
1070 delete into preceding <div> tag, if the del looks like (DEL_START,
1071 'Text</div>', DEL_END)</p>
1076 <a name="split_unbalanced"></a>
1077 <div class="private">
1078 <table class="details" border="1" cellpadding="3"
1079 cellspacing="0" width="100%" bgcolor="white">
1081 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1082 <tr valign="top"><td>
1083 <h3 class="epydoc"><span class="sig"><span class="sig-name">split_unbalanced</span>(<span class="sig-arg">chunks</span>)</span>
1085 </td><td align="right" valign="top"
1086 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#split_unbalanced">source code</a></span>
1090 <p>Return (unbalanced_start, balanced, unbalanced_end), where each is
1091 a list of text and tag chunks.</p>
1092 <p>unbalanced_start is a list of all the tags that are opened, but
1093 not closed in this span. Similarly, unbalanced_end is a list of
1094 tags that are closed but were not opened. Extracting these might
1095 mean some reordering of the chunks.</p>
1100 <a name="locate_unbalanced_start"></a>
1101 <div class="private">
1102 <table class="details" border="1" cellpadding="3"
1103 cellspacing="0" width="100%" bgcolor="white">
1105 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1106 <tr valign="top"><td>
1107 <h3 class="epydoc"><span class="sig"><span class="sig-name">locate_unbalanced_start</span>(<span class="sig-arg">unbalanced_start</span>,
1108 <span class="sig-arg">pre_delete</span>,
1109 <span class="sig-arg">post_delete</span>)</span>
1111 </td><td align="right" valign="top"
1112 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_start">source code</a></span>
1116 <p>pre_delete and post_delete implicitly point to a place in the
1117 document (where the two were split). This moves that point (by
1118 popping items from one and pushing them onto the other). It moves
1119 the point to try to find a place where unbalanced_start applies.</p>
1120 <p>As an example:</p>
1121 <pre class="rst-literal-block">
1122 >>> unbalanced_start = ['<div>']
1123 >>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>']
1124 >>> pre, post = doc[:3], doc[3:]
1125 >>> pre, post
1126 (['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>'])
1127 >>> locate_unbalanced_start(unbalanced_start, pre, post)
1128 >>> pre, post
1129 (['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>'])
1131 <p>As you can see, we moved the point so that the dangling <div> that
1132 we found will be effectively replaced by the div in the original
1133 document. If this doesn't work out, we just throw away
1134 unbalanced_start without doing anything.</p>
1139 <a name="tokenize"></a>
1140 <div class="private">
1141 <table class="details" border="1" cellpadding="3"
1142 cellspacing="0" width="100%" bgcolor="white">
1144 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1145 <tr valign="top"><td>
1146 <h3 class="epydoc"><span class="sig"><span class="sig-name">tokenize</span>(<span class="sig-arg">html</span>,
1147 <span class="sig-arg">include_hrefs</span>=<span class="sig-default">True</span>)</span>
1149 </td><td align="right" valign="top"
1150 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize">source code</a></span>
1154 <p>Parse the given HTML and returns token objects (words with attached tags).</p>
1155 <p>This parses only the content of a page; anything in the head is
1156 ignored, and the <head> and <body> elements are themselves
1157 optional. The content is then parsed by lxml, which ensures the
1158 validity of the resulting parsed document (though lxml may make
1159 incorrect guesses when the markup is particular bad).</p>
1160 <p><ins> and <del> tags are also eliminated from the document, as
1161 that gets confusing.</p>
1162 <p>If include_hrefs is true, then the href attribute of <a> tags is
1163 included as a special kind of diffable token.</p>
1168 <a name="parse_html"></a>
1169 <div class="private">
1170 <table class="details" border="1" cellpadding="3"
1171 cellspacing="0" width="100%" bgcolor="white">
1173 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1174 <tr valign="top"><td>
1175 <h3 class="epydoc"><span class="sig"><span class="sig-name">parse_html</span>(<span class="sig-arg">html</span>,
1176 <span class="sig-arg">cleanup</span>=<span class="sig-default">True</span>)</span>
1178 </td><td align="right" valign="top"
1179 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#parse_html">source code</a></span>
1183 <p>Parses an HTML fragment, returning an lxml element. Note that the HTML will be
1184 wrapped in a <div> tag that was not in the original document.</p>
1185 <p>If cleanup is true, make sure there's no <head> or <body>, and get
1186 rid of any <ins> and <del> tags.</p>
1191 <a name="split_trailing_whitespace"></a>
1192 <div class="private">
1193 <table class="details" border="1" cellpadding="3"
1194 cellspacing="0" width="100%" bgcolor="white">
1196 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1197 <tr valign="top"><td>
1198 <h3 class="epydoc"><span class="sig"><span class="sig-name">split_trailing_whitespace</span>(<span class="sig-arg">word</span>)</span>
1200 </td><td align="right" valign="top"
1201 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#split_trailing_whitespace">source code</a></span>
1206 This function takes a word, such as 'test</blockquote>
1207 <p>' and returns ('test','</p>
1213 <a name="flatten_el"></a>
1214 <div class="private">
1215 <table class="details" border="1" cellpadding="3"
1216 cellspacing="0" width="100%" bgcolor="white">
1218 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1219 <tr valign="top"><td>
1220 <h3 class="epydoc"><span class="sig"><span class="sig-name">flatten_el</span>(<span class="sig-arg">el</span>,
1221 <span class="sig-arg">include_hrefs</span>,
1222 <span class="sig-arg">skip_tag</span>=<span class="sig-default">False</span>)</span>
1224 </td><td align="right" valign="top"
1225 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#flatten_el">source code</a></span>
1229 <p>Takes an lxml element el, and generates all the text chunks for
1230 that tag. Each start tag is a chunk, each word is a chunk, and each
1231 end tag is a chunk.</p>
1232 <p>If skip_tag is true, then the outermost container tag is
1233 not returned (just its contents).</p>
1238 <a name="serialize_html_fragment"></a>
1239 <div class="private">
1240 <table class="details" border="1" cellpadding="3"
1241 cellspacing="0" width="100%" bgcolor="white">
1243 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1244 <tr valign="top"><td>
1245 <h3 class="epydoc"><span class="sig"><span class="sig-name">serialize_html_fragment</span>(<span class="sig-arg">el</span>,
1246 <span class="sig-arg">skip_outer</span>=<span class="sig-default">False</span>)</span>
1248 </td><td align="right" valign="top"
1249 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#serialize_html_fragment">source code</a></span>
1253 <p>Serialize a single lxml element as HTML. The serialized form
1254 includes the elements tail.</p>
1255 <p>If skip_outer is true, then don't serialize the outermost tag</p>
1261 <!-- ==================== VARIABLES DETAILS ==================== -->
1262 <a name="section-VariablesDetails"></a>
1263 <table class="details" border="1" cellpadding="3"
1264 cellspacing="0" width="100%" bgcolor="white">
1265 <tr bgcolor="#70b0f0" class="table-header">
1266 <td colspan="2" class="table-header">
1267 <table border="0" cellpadding="0" cellspacing="0" width="100%">
1269 <td align="left"><span class="table-header">Variables Details</span></td>
1270 <td align="right" valign="top"
1271 ><span class="options">[<a href="#section-VariablesDetails"
1272 class="privatelink" onclick="toggle_private();"
1273 >hide private</a>]</span></td>
1279 <a name="empty_tags"></a>
1280 <div class="private">
1281 <table class="details" border="1" cellpadding="3"
1282 cellspacing="0" width="100%" bgcolor="white">
1284 <h3 class="epydoc">empty_tags</h3>
1290 <dd><table><tr><td><pre class="variable">
1291 <code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">param</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1292 <code class="variable-quote">u'</code><code class="variable-string">img</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1293 <code class="variable-quote">u'</code><code class="variable-string">area</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1294 <code class="variable-quote">u'</code><code class="variable-string">br</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1295 <code class="variable-quote">u'</code><code class="variable-string">basefont</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1296 <code class="variable-quote">u'</code><code class="variable-string">input</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1297 <code class="variable-quote">u'</code><code class="variable-string">base</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1298 <code class="variable-quote">u'</code><code class="variable-string">meta</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1299 <code class="variable-ellipsis">...</code>
1300 </pre></td></tr></table>
1305 <a name="block_level_tags"></a>
1306 <div class="private">
1307 <table class="details" border="1" cellpadding="3"
1308 cellspacing="0" width="100%" bgcolor="white">
1310 <h3 class="epydoc">block_level_tags</h3>
1316 <dd><table><tr><td><pre class="variable">
1317 <code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">address</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1318 <code class="variable-quote">u'</code><code class="variable-string">blockquote</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1319 <code class="variable-quote">u'</code><code class="variable-string">center</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1320 <code class="variable-quote">u'</code><code class="variable-string">dir</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1321 <code class="variable-quote">u'</code><code class="variable-string">div</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1322 <code class="variable-quote">u'</code><code class="variable-string">dl</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1323 <code class="variable-quote">u'</code><code class="variable-string">fieldset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1324 <code class="variable-quote">u'</code><code class="variable-string">form</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1325 <code class="variable-ellipsis">...</code>
1326 </pre></td></tr></table>
1331 <a name="block_level_container_tags"></a>
1332 <div class="private">
1333 <table class="details" border="1" cellpadding="3"
1334 cellspacing="0" width="100%" bgcolor="white">
1336 <h3 class="epydoc">block_level_container_tags</h3>
1342 <dd><table><tr><td><pre class="variable">
1343 <code class="variable-group">(</code><code class="variable-quote">u'</code><code class="variable-string">dd</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1344 <code class="variable-quote">u'</code><code class="variable-string">dt</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1345 <code class="variable-quote">u'</code><code class="variable-string">frameset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1346 <code class="variable-quote">u'</code><code class="variable-string">li</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1347 <code class="variable-quote">u'</code><code class="variable-string">tbody</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1348 <code class="variable-quote">u'</code><code class="variable-string">td</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1349 <code class="variable-quote">u'</code><code class="variable-string">tfoot</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1350 <code class="variable-quote">u'</code><code class="variable-string">th</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1351 <code class="variable-ellipsis">...</code>
1352 </pre></td></tr></table>
1357 <a name="__test__"></a>
1358 <div class="private">
1359 <table class="details" border="1" cellpadding="3"
1360 cellspacing="0" width="100%" bgcolor="white">
1362 <h3 class="epydoc">__test__</h3>
1368 <dd><table><tr><td><pre class="variable">
1369 <code class="variable-group">{</code><code class="variable-quote">u'</code><code class="variable-string">html_annotate (line 35)</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">u'''</code><code class="variable-string"></code>
1370 <code class="variable-string"> doclist should be ordered from oldest to newest, like::</code>
1371 <code class="variable-string"></code>
1372 <code class="variable-string"> >>> version1 = 'Hello World'</code>
1373 <code class="variable-string"> >>> version2 = 'Goodbye World'</code>
1374 <code class="variable-string"> >>> print(html_annotate([(version1, 'version 1'),</code>
1375 <code class="variable-string"> ... (version2, 'version 2')]))</code>
1376 <code class="variable-string"> <span title="version 2">Goodbye</span> <span title="version 1"</code><span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
1377 <code class="variable-ellipsis">...</code>
1378 </pre></td></tr></table>
1384 <!-- ==================== NAVIGATION BAR ==================== -->
1385 <table class="navbar" border="0" width="100%" cellpadding="0"
1386 bgcolor="#a0c0ff" cellspacing="0">
1387 <tr valign="middle">
1389 <th> <a
1390 href="lxml-module.html">Home</a> </th>
1393 <th> <a
1394 href="module-tree.html">Trees</a> </th>
1397 <th> <a
1398 href="identifier-index.html">Indices</a> </th>
1401 <th> <a
1402 href="help.html">Help</a> </th>
1404 <!-- Project homepage -->
1405 <th class="navbar" align="right" width="100%">
1406 <table border="0" cellpadding="0" cellspacing="0">
1407 <tr><th class="navbar" align="center"
1408 ><a class="navbar" target="_top" href="/">lxml API</a></th>
1412 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
1414 <td align="left" class="footer">
1415 Generated by Epydoc 3.0.1
1416 on Thu Jul 9 18:29:53 2020
1418 <td align="right" class="footer">
1419 <a target="mainFrame" href="http://epydoc.sourceforge.net"
1420 >http://epydoc.sourceforge.net</a>
1425 <script type="text/javascript">
1427 // Private objects are initially displayed (because if
1428 // javascript is turned off then we want them to be
1429 // visible); but by default, we want to hide them. So hide
1430 // them unless we have a cookie that says to show them.