1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.html.diff</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="http://codespeak.net/lxml/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.html-module.html">Package html</a> ::
51 <table cellpadding="0" cellspacing="0">
52 <!-- hide/show private -->
53 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54 onclick="toggle_private();">hide private</a>]</span></td></tr>
55 <tr><td align="right"><span class="options"
56 >[<a href="frames.html" target="_top">frames</a
57 >] | <a href="lxml.html.diff-module.html"
58 target="_top">no frames</a>]</span></td></tr>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module diff</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.diff-pysrc.html">source code</a></span></p>
65 <!-- ==================== CLASSES ==================== -->
66 <a name="section-Classes"></a>
67 <table class="summary" border="1" cellpadding="3"
68 cellspacing="0" width="100%" bgcolor="white">
69 <tr bgcolor="#70b0f0" class="table-header">
70 <td colspan="2" class="table-header">
71 <table border="0" cellpadding="0" cellspacing="0" width="100%">
73 <td align="left"><span class="table-header">Classes</span></td>
74 <td align="right" valign="top"
75 ><span class="options">[<a href="#section-Classes"
76 class="privatelink" onclick="toggle_private();"
77 >hide private</a>]</span></td>
83 <td width="15%" align="right" valign="top" class="summary">
84 <span class="summary-type"> </span>
85 </td><td class="summary">
86 <a href="lxml.html.diff.DEL_START-class.html" class="summary-name" onclick="show_private();">DEL_START</a>
90 <td width="15%" align="right" valign="top" class="summary">
91 <span class="summary-type"> </span>
92 </td><td class="summary">
93 <a href="lxml.html.diff.DEL_END-class.html" class="summary-name" onclick="show_private();">DEL_END</a>
97 <td width="15%" align="right" valign="top" class="summary">
98 <span class="summary-type"> </span>
99 </td><td class="summary">
100 <a href="lxml.html.diff.NoDeletes-class.html" class="summary-name" onclick="show_private();">NoDeletes</a><br />
101 Raised when the document no longer contains any pending deletes
106 <td width="15%" align="right" valign="top" class="summary">
107 <span class="summary-type"> </span>
108 </td><td class="summary">
109 <a href="lxml.html.diff.token-class.html" class="summary-name" onclick="show_private();">token</a><br />
110 Represents a diffable token, generally a word that is displayed to
115 <td width="15%" align="right" valign="top" class="summary">
116 <span class="summary-type"> </span>
117 </td><td class="summary">
118 <a href="lxml.html.diff.tag_token-class.html" class="summary-name" onclick="show_private();">tag_token</a><br />
119 Represents a token that is actually a tag.
123 <td width="15%" align="right" valign="top" class="summary">
124 <span class="summary-type"> </span>
125 </td><td class="summary">
126 <a href="lxml.html.diff.href_token-class.html" class="summary-name" onclick="show_private();">href_token</a><br />
127 Represents the href in an anchor tag.
131 <td width="15%" align="right" valign="top" class="summary">
132 <span class="summary-type"> </span>
133 </td><td class="summary">
134 <a href="lxml.html.diff.InsensitiveSequenceMatcher-class.html" class="summary-name" onclick="show_private();">InsensitiveSequenceMatcher</a><br />
135 Acts like SequenceMatcher, but tries not to find very small equal
136 blocks amidst large spans of changes
140 <!-- ==================== FUNCTIONS ==================== -->
141 <a name="section-Functions"></a>
142 <table class="summary" border="1" cellpadding="3"
143 cellspacing="0" width="100%" bgcolor="white">
144 <tr bgcolor="#70b0f0" class="table-header">
145 <td colspan="2" class="table-header">
146 <table border="0" cellpadding="0" cellspacing="0" width="100%">
148 <td align="left"><span class="table-header">Functions</span></td>
149 <td align="right" valign="top"
150 ><span class="options">[<a href="#section-Functions"
151 class="privatelink" onclick="toggle_private();"
152 >hide private</a>]</span></td>
158 <td width="15%" align="right" valign="top" class="summary">
159 <span class="summary-type"> </span>
160 </td><td class="summary">
161 <table width="100%" cellpadding="0" cellspacing="0" border="0">
163 <td><span class="summary-sig"><a name="default_markup"></a><span class="summary-sig-name">default_markup</span>(<span class="summary-sig-arg">text</span>,
164 <span class="summary-sig-arg">version</span>)</span></td>
165 <td align="right" valign="top">
166 <span class="codelink"><a href="lxml.html.diff-pysrc.html#default_markup">source code</a></span>
175 <td width="15%" align="right" valign="top" class="summary">
176 <span class="summary-type"> </span>
177 </td><td class="summary">
178 <table width="100%" cellpadding="0" cellspacing="0" border="0">
180 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#html_annotate" class="summary-sig-name">html_annotate</a>(<span class="summary-sig-arg">doclist</span>,
181 <span class="summary-sig-arg">markup</span>=<span class="summary-sig-default"><function default_markup at 0x9800d4c></span>)</span><br />
182 doclist should be ordered from oldest to newest, like:</td>
183 <td align="right" valign="top">
184 <span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate">source code</a></span>
193 <td width="15%" align="right" valign="top" class="summary">
194 <span class="summary-type"> </span>
195 </td><td class="summary">
196 <table width="100%" cellpadding="0" cellspacing="0" border="0">
198 <td><span class="summary-sig"><a name="tokenize_annotated"></a><span class="summary-sig-name">tokenize_annotated</span>(<span class="summary-sig-arg">doc</span>,
199 <span class="summary-sig-arg">annotation</span>)</span><br />
200 Tokenize a document and add an annotation attribute to each token</td>
201 <td align="right" valign="top">
202 <span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize_annotated">source code</a></span>
211 <td width="15%" align="right" valign="top" class="summary">
212 <span class="summary-type"> </span>
213 </td><td class="summary">
214 <table width="100%" cellpadding="0" cellspacing="0" border="0">
216 <td><span class="summary-sig"><a name="html_annotate_merge_annotations"></a><span class="summary-sig-name">html_annotate_merge_annotations</span>(<span class="summary-sig-arg">tokens_old</span>,
217 <span class="summary-sig-arg">tokens_new</span>)</span><br />
218 Merge the annotations from tokens_old into tokens_new, when the
219 tokens in the new document already existed in the old document.</td>
220 <td align="right" valign="top">
221 <span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate_merge_annotations">source code</a></span>
230 <td width="15%" align="right" valign="top" class="summary">
231 <span class="summary-type"> </span>
232 </td><td class="summary">
233 <table width="100%" cellpadding="0" cellspacing="0" border="0">
235 <td><span class="summary-sig"><a name="copy_annotations"></a><span class="summary-sig-name">copy_annotations</span>(<span class="summary-sig-arg">src</span>,
236 <span class="summary-sig-arg">dest</span>)</span><br />
237 Copy annotations from the tokens listed in src to the tokens in dest</td>
238 <td align="right" valign="top">
239 <span class="codelink"><a href="lxml.html.diff-pysrc.html#copy_annotations">source code</a></span>
248 <td width="15%" align="right" valign="top" class="summary">
249 <span class="summary-type"> </span>
250 </td><td class="summary">
251 <table width="100%" cellpadding="0" cellspacing="0" border="0">
253 <td><span class="summary-sig"><a name="compress_tokens"></a><span class="summary-sig-name">compress_tokens</span>(<span class="summary-sig-arg">tokens</span>)</span><br />
254 Combine adjacent tokens when there is no HTML between the tokens,
255 and they share an annotation</td>
256 <td align="right" valign="top">
257 <span class="codelink"><a href="lxml.html.diff-pysrc.html#compress_tokens">source code</a></span>
266 <td width="15%" align="right" valign="top" class="summary">
267 <span class="summary-type"> </span>
268 </td><td class="summary">
269 <table width="100%" cellpadding="0" cellspacing="0" border="0">
271 <td><span class="summary-sig"><a name="compress_merge_back"></a><span class="summary-sig-name">compress_merge_back</span>(<span class="summary-sig-arg">tokens</span>,
272 <span class="summary-sig-arg">tok</span>)</span><br />
273 Merge tok into the last element of tokens (modifying the list of
274 tokens in-place).</td>
275 <td align="right" valign="top">
276 <span class="codelink"><a href="lxml.html.diff-pysrc.html#compress_merge_back">source code</a></span>
285 <td width="15%" align="right" valign="top" class="summary">
286 <span class="summary-type"> </span>
287 </td><td class="summary">
288 <table width="100%" cellpadding="0" cellspacing="0" border="0">
290 <td><span class="summary-sig"><a name="markup_serialize_tokens"></a><span class="summary-sig-name">markup_serialize_tokens</span>(<span class="summary-sig-arg">tokens</span>,
291 <span class="summary-sig-arg">markup_func</span>)</span><br />
292 Serialize the list of tokens into a list of text chunks, calling
293 markup_func around text to add annotations.</td>
294 <td align="right" valign="top">
295 <span class="codelink"><a href="lxml.html.diff-pysrc.html#markup_serialize_tokens">source code</a></span>
304 <td width="15%" align="right" valign="top" class="summary">
305 <span class="summary-type"> </span>
306 </td><td class="summary">
307 <table width="100%" cellpadding="0" cellspacing="0" border="0">
309 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#htmldiff" class="summary-sig-name">htmldiff</a>(<span class="summary-sig-arg">old_html</span>,
310 <span class="summary-sig-arg">new_html</span>)</span><br />
311 Do a diff of the old and new document.</td>
312 <td align="right" valign="top">
313 <span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff">source code</a></span>
322 <td width="15%" align="right" valign="top" class="summary">
323 <span class="summary-type"> </span>
324 </td><td class="summary">
325 <table width="100%" cellpadding="0" cellspacing="0" border="0">
327 <td><span class="summary-sig"><a name="htmldiff_tokens"></a><span class="summary-sig-name">htmldiff_tokens</span>(<span class="summary-sig-arg">html1_tokens</span>,
328 <span class="summary-sig-arg">html2_tokens</span>)</span><br />
329 Does a diff on the tokens themselves, returning a list of text
330 chunks (not tokens).</td>
331 <td align="right" valign="top">
332 <span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff_tokens">source code</a></span>
341 <td width="15%" align="right" valign="top" class="summary">
342 <span class="summary-type"> </span>
343 </td><td class="summary">
344 <table width="100%" cellpadding="0" cellspacing="0" border="0">
346 <td><span class="summary-sig"><a name="expand_tokens"></a><span class="summary-sig-name">expand_tokens</span>(<span class="summary-sig-arg">tokens</span>,
347 <span class="summary-sig-arg">equal</span>=<span class="summary-sig-default">False</span>)</span><br />
348 Given a list of tokens, return a generator of the chunks of
349 text for the data in the tokens.</td>
350 <td align="right" valign="top">
351 <span class="codelink"><a href="lxml.html.diff-pysrc.html#expand_tokens">source code</a></span>
360 <td width="15%" align="right" valign="top" class="summary">
361 <span class="summary-type"> </span>
362 </td><td class="summary">
363 <table width="100%" cellpadding="0" cellspacing="0" border="0">
365 <td><span class="summary-sig"><a name="merge_insert"></a><span class="summary-sig-name">merge_insert</span>(<span class="summary-sig-arg">ins_chunks</span>,
366 <span class="summary-sig-arg">doc</span>)</span><br />
367 doc is the already-handled document (as a list of text chunks);
368 here we add <ins>ins_chunks</ins> to the end of that.</td>
369 <td align="right" valign="top">
370 <span class="codelink"><a href="lxml.html.diff-pysrc.html#merge_insert">source code</a></span>
379 <td width="15%" align="right" valign="top" class="summary">
380 <span class="summary-type"> </span>
381 </td><td class="summary">
382 <table width="100%" cellpadding="0" cellspacing="0" border="0">
384 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#merge_delete" class="summary-sig-name" onclick="show_private();">merge_delete</a>(<span class="summary-sig-arg">del_chunks</span>,
385 <span class="summary-sig-arg">doc</span>)</span><br />
386 Adds the text chunks in del_chunks to the document doc (another
387 list of text chunks) with marker to show it is a delete.</td>
388 <td align="right" valign="top">
389 <span class="codelink"><a href="lxml.html.diff-pysrc.html#merge_delete">source code</a></span>
398 <td width="15%" align="right" valign="top" class="summary">
399 <span class="summary-type"> </span>
400 </td><td class="summary">
401 <table width="100%" cellpadding="0" cellspacing="0" border="0">
403 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#cleanup_delete" class="summary-sig-name" onclick="show_private();">cleanup_delete</a>(<span class="summary-sig-arg">chunks</span>)</span><br />
404 Cleans up any DEL_START/DEL_END markers in the document, replacing
405 them with <del></del>.</td>
406 <td align="right" valign="top">
407 <span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_delete">source code</a></span>
416 <td width="15%" align="right" valign="top" class="summary">
417 <span class="summary-type"> </span>
418 </td><td class="summary">
419 <table width="100%" cellpadding="0" cellspacing="0" border="0">
421 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#split_unbalanced" class="summary-sig-name" onclick="show_private();">split_unbalanced</a>(<span class="summary-sig-arg">chunks</span>)</span><br />
422 Return (unbalanced_start, balanced, unbalanced_end), where each is
423 a list of text and tag chunks.</td>
424 <td align="right" valign="top">
425 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_unbalanced">source code</a></span>
434 <td width="15%" align="right" valign="top" class="summary">
435 <span class="summary-type"> </span>
436 </td><td class="summary">
437 <table width="100%" cellpadding="0" cellspacing="0" border="0">
439 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#split_delete" class="summary-sig-name" onclick="show_private();">split_delete</a>(<span class="summary-sig-arg">chunks</span>)</span><br />
440 Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
441 stuff_after_DEL_END).</td>
442 <td align="right" valign="top">
443 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_delete">source code</a></span>
452 <td width="15%" align="right" valign="top" class="summary">
453 <span class="summary-type"> </span>
454 </td><td class="summary">
455 <table width="100%" cellpadding="0" cellspacing="0" border="0">
457 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#locate_unbalanced_start" class="summary-sig-name" onclick="show_private();">locate_unbalanced_start</a>(<span class="summary-sig-arg">unbalanced_start</span>,
458 <span class="summary-sig-arg">pre_delete</span>,
459 <span class="summary-sig-arg">post_delete</span>)</span><br />
460 pre_delete and post_delete implicitly point to a place in the
461 document (where the two were split).</td>
462 <td align="right" valign="top">
463 <span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_start">source code</a></span>
472 <td width="15%" align="right" valign="top" class="summary">
473 <span class="summary-type"> </span>
474 </td><td class="summary">
475 <table width="100%" cellpadding="0" cellspacing="0" border="0">
477 <td><span class="summary-sig"><a name="locate_unbalanced_end"></a><span class="summary-sig-name">locate_unbalanced_end</span>(<span class="summary-sig-arg">unbalanced_end</span>,
478 <span class="summary-sig-arg">pre_delete</span>,
479 <span class="summary-sig-arg">post_delete</span>)</span><br />
480 like locate_unbalanced_start, except handling end tags and
481 possibly moving the point earlier in the document.</td>
482 <td align="right" valign="top">
483 <span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_end">source code</a></span>
492 <td width="15%" align="right" valign="top" class="summary">
493 <span class="summary-type"> </span>
494 </td><td class="summary">
495 <table width="100%" cellpadding="0" cellspacing="0" border="0">
497 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#tokenize" class="summary-sig-name" onclick="show_private();">tokenize</a>(<span class="summary-sig-arg">html</span>,
498 <span class="summary-sig-arg">include_hrefs</span>=<span class="summary-sig-default">True</span>)</span><br />
499 Parse the given HTML and returns token objects (words with attached tags).</td>
500 <td align="right" valign="top">
501 <span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize">source code</a></span>
510 <td width="15%" align="right" valign="top" class="summary">
511 <span class="summary-type"> </span>
512 </td><td class="summary">
513 <table width="100%" cellpadding="0" cellspacing="0" border="0">
515 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#parse_html" class="summary-sig-name" onclick="show_private();">parse_html</a>(<span class="summary-sig-arg">html</span>,
516 <span class="summary-sig-arg">cleanup</span>=<span class="summary-sig-default">True</span>)</span><br />
517 Parses an HTML fragment, returning an lxml element.</td>
518 <td align="right" valign="top">
519 <span class="codelink"><a href="lxml.html.diff-pysrc.html#parse_html">source code</a></span>
528 <td width="15%" align="right" valign="top" class="summary">
529 <span class="summary-type"> </span>
530 </td><td class="summary">
531 <table width="100%" cellpadding="0" cellspacing="0" border="0">
533 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#cleanup_html" class="summary-sig-name" onclick="show_private();">cleanup_html</a>(<span class="summary-sig-arg">html</span>)</span><br />
534 This 'cleans' the HTML, meaning that any page structure is removed
535 (only the contents of <body> are used, if there is any <body).</td>
536 <td align="right" valign="top">
537 <span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_html">source code</a></span>
546 <td width="15%" align="right" valign="top" class="summary">
547 <span class="summary-type"> </span>
548 </td><td class="summary">
549 <table width="100%" cellpadding="0" cellspacing="0" border="0">
551 <td><span class="summary-sig"><a name="fixup_chunks"></a><span class="summary-sig-name">fixup_chunks</span>(<span class="summary-sig-arg">chunks</span>)</span><br />
552 This function takes a list of chunks and produces a list of tokens.</td>
553 <td align="right" valign="top">
554 <span class="codelink"><a href="lxml.html.diff-pysrc.html#fixup_chunks">source code</a></span>
563 <td width="15%" align="right" valign="top" class="summary">
564 <span class="summary-type"> </span>
565 </td><td class="summary">
566 <table width="100%" cellpadding="0" cellspacing="0" border="0">
568 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#flatten_el" class="summary-sig-name" onclick="show_private();">flatten_el</a>(<span class="summary-sig-arg">el</span>,
569 <span class="summary-sig-arg">include_hrefs</span>,
570 <span class="summary-sig-arg">skip_tag</span>=<span class="summary-sig-default">False</span>)</span><br />
571 Takes an lxml element el, and generates all the text chunks for
573 <td align="right" valign="top">
574 <span class="codelink"><a href="lxml.html.diff-pysrc.html#flatten_el">source code</a></span>
583 <td width="15%" align="right" valign="top" class="summary">
584 <span class="summary-type"> </span>
585 </td><td class="summary">
586 <table width="100%" cellpadding="0" cellspacing="0" border="0">
588 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#split_words" class="summary-sig-name" onclick="show_private();">split_words</a>(<span class="summary-sig-arg">text</span>)</span><br />
589 Splits some text into words.</td>
590 <td align="right" valign="top">
591 <span class="codelink"><a href="lxml.html.diff-pysrc.html#split_words">source code</a></span>
600 <td width="15%" align="right" valign="top" class="summary">
601 <span class="summary-type"> </span>
602 </td><td class="summary">
603 <table width="100%" cellpadding="0" cellspacing="0" border="0">
605 <td><span class="summary-sig"><a name="start_tag"></a><span class="summary-sig-name">start_tag</span>(<span class="summary-sig-arg">el</span>)</span><br />
606 The text representation of the start tag for a tag.</td>
607 <td align="right" valign="top">
608 <span class="codelink"><a href="lxml.html.diff-pysrc.html#start_tag">source code</a></span>
617 <td width="15%" align="right" valign="top" class="summary">
618 <span class="summary-type"> </span>
619 </td><td class="summary">
620 <table width="100%" cellpadding="0" cellspacing="0" border="0">
622 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#end_tag" class="summary-sig-name" onclick="show_private();">end_tag</a>(<span class="summary-sig-arg">el</span>)</span><br />
623 The text representation of an end tag for a tag.</td>
624 <td align="right" valign="top">
625 <span class="codelink"><a href="lxml.html.diff-pysrc.html#end_tag">source code</a></span>
634 <td width="15%" align="right" valign="top" class="summary">
635 <span class="summary-type"> </span>
636 </td><td class="summary">
637 <table width="100%" cellpadding="0" cellspacing="0" border="0">
639 <td><span class="summary-sig"><a name="is_word"></a><span class="summary-sig-name">is_word</span>(<span class="summary-sig-arg">tok</span>)</span></td>
640 <td align="right" valign="top">
641 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_word">source code</a></span>
650 <td width="15%" align="right" valign="top" class="summary">
651 <span class="summary-type"> </span>
652 </td><td class="summary">
653 <table width="100%" cellpadding="0" cellspacing="0" border="0">
655 <td><span class="summary-sig"><a name="is_end_tag"></a><span class="summary-sig-name">is_end_tag</span>(<span class="summary-sig-arg">tok</span>)</span></td>
656 <td align="right" valign="top">
657 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_end_tag">source code</a></span>
666 <td width="15%" align="right" valign="top" class="summary">
667 <span class="summary-type"> </span>
668 </td><td class="summary">
669 <table width="100%" cellpadding="0" cellspacing="0" border="0">
671 <td><span class="summary-sig"><a name="is_start_tag"></a><span class="summary-sig-name">is_start_tag</span>(<span class="summary-sig-arg">tok</span>)</span></td>
672 <td align="right" valign="top">
673 <span class="codelink"><a href="lxml.html.diff-pysrc.html#is_start_tag">source code</a></span>
682 <td width="15%" align="right" valign="top" class="summary">
683 <span class="summary-type"> </span>
684 </td><td class="summary">
685 <table width="100%" cellpadding="0" cellspacing="0" border="0">
687 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#fixup_ins_del_tags" class="summary-sig-name" onclick="show_private();">fixup_ins_del_tags</a>(<span class="summary-sig-arg">html</span>)</span><br />
688 Given an html string, move any <ins> or <del> tags inside of any
689 block-level elements, e.g.</td>
690 <td align="right" valign="top">
691 <span class="codelink"><a href="lxml.html.diff-pysrc.html#fixup_ins_del_tags">source code</a></span>
700 <td width="15%" align="right" valign="top" class="summary">
701 <span class="summary-type"> </span>
702 </td><td class="summary">
703 <table width="100%" cellpadding="0" cellspacing="0" border="0">
705 <td><span class="summary-sig"><a href="lxml.html.diff-module.html#serialize_html_fragment" class="summary-sig-name" onclick="show_private();">serialize_html_fragment</a>(<span class="summary-sig-arg">el</span>,
706 <span class="summary-sig-arg">skip_outer</span>=<span class="summary-sig-default">False</span>)</span><br />
707 Serialize a single lxml element as HTML.</td>
708 <td align="right" valign="top">
709 <span class="codelink"><a href="lxml.html.diff-pysrc.html#serialize_html_fragment">source code</a></span>
718 <td width="15%" align="right" valign="top" class="summary">
719 <span class="summary-type"> </span>
720 </td><td class="summary">
721 <table width="100%" cellpadding="0" cellspacing="0" border="0">
723 <td><span class="summary-sig"><a name="_fixup_ins_del_tags"></a><span class="summary-sig-name">_fixup_ins_del_tags</span>(<span class="summary-sig-arg">doc</span>)</span><br />
724 fixup_ins_del_tags that works on an lxml document in-place</td>
725 <td align="right" valign="top">
726 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_fixup_ins_del_tags">source code</a></span>
735 <td width="15%" align="right" valign="top" class="summary">
736 <span class="summary-type"> </span>
737 </td><td class="summary">
738 <table width="100%" cellpadding="0" cellspacing="0" border="0">
740 <td><span class="summary-sig"><a name="_contains_block_level_tag"></a><span class="summary-sig-name">_contains_block_level_tag</span>(<span class="summary-sig-arg">el</span>)</span><br />
741 True if the element contains any block-level elements, like <p>, <td>, etc.</td>
742 <td align="right" valign="top">
743 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_contains_block_level_tag">source code</a></span>
752 <td width="15%" align="right" valign="top" class="summary">
753 <span class="summary-type"> </span>
754 </td><td class="summary">
755 <table width="100%" cellpadding="0" cellspacing="0" border="0">
757 <td><span class="summary-sig"><a name="_move_el_inside_block"></a><span class="summary-sig-name">_move_el_inside_block</span>(<span class="summary-sig-arg">el</span>,
758 <span class="summary-sig-arg">tag</span>)</span><br />
759 helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
760 and moves them inside any block-level tags.</td>
761 <td align="right" valign="top">
762 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_move_el_inside_block">source code</a></span>
771 <td width="15%" align="right" valign="top" class="summary">
772 <span class="summary-type"> </span>
773 </td><td class="summary">
774 <table width="100%" cellpadding="0" cellspacing="0" border="0">
776 <td><span class="summary-sig"><a name="_merge_element_contents"></a><span class="summary-sig-name">_merge_element_contents</span>(<span class="summary-sig-arg">el</span>)</span><br />
777 Removes an element, but merges its contents into its place, e.g.,
778 given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
779 <p>Hi there!</p></td>
780 <td align="right" valign="top">
781 <span class="codelink"><a href="lxml.html.diff-pysrc.html#_merge_element_contents">source code</a></span>
790 <!-- ==================== VARIABLES ==================== -->
791 <a name="section-Variables"></a>
792 <table class="summary" border="1" cellpadding="3"
793 cellspacing="0" width="100%" bgcolor="white">
794 <tr bgcolor="#70b0f0" class="table-header">
795 <td colspan="2" class="table-header">
796 <table border="0" cellpadding="0" cellspacing="0" width="100%">
798 <td align="left"><span class="table-header">Variables</span></td>
799 <td align="right" valign="top"
800 ><span class="options">[<a href="#section-Variables"
801 class="privatelink" onclick="toggle_private();"
802 >hide private</a>]</span></td>
808 <td width="15%" align="right" valign="top" class="summary">
809 <span class="summary-type"> </span>
810 </td><td class="summary">
811 <a name="_body_re"></a><span class="summary-name">_body_re</span> = <code title="re.compile(r'(?is)<body.*?>')">re.compile(r'<code class="re-flags">(?is)</code><body.<code class="re-op">*?</code>>')</code>
815 <td width="15%" align="right" valign="top" class="summary">
816 <span class="summary-type"> </span>
817 </td><td class="summary">
818 <a name="_end_body_re"></a><span class="summary-name">_end_body_re</span> = <code title="re.compile(r'(?is)</body.*?>')">re.compile(r'<code class="re-flags">(?is)</code></body.<code class="re-op">*?</code>>')</code>
822 <td width="15%" align="right" valign="top" class="summary">
823 <span class="summary-type"> </span>
824 </td><td class="summary">
825 <a name="_ins_del_re"></a><span class="summary-name">_ins_del_re</span> = <code title="re.compile(r'(?is)</?(ins|del).*?>')">re.compile(r'<code class="re-flags">(?is)</code></<code class="re-op">?</code><code class="re-group">(</code>ins<code class="re-op">|</code>del<code class="re-group">)</code>.<code class="re-op">*?</code>>')</code>
829 <td width="15%" align="right" valign="top" class="summary">
830 <span class="summary-type"> </span>
831 </td><td class="summary">
832 <a name="end_whitespace_re"></a><span class="summary-name">end_whitespace_re</span> = <code title="re.compile(r'[ \t\n\r]$')">re.compile(r'<code class="re-group">[</code> \t\n\r<code class="re-group">]</code>$')</code>
836 <td width="15%" align="right" valign="top" class="summary">
837 <span class="summary-type"> </span>
838 </td><td class="summary">
839 <a href="lxml.html.diff-module.html#empty_tags" class="summary-name" onclick="show_private();">empty_tags</a> = <code title="('param',
847 ..."><code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">param</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">img</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">area</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">br</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">basefont</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">input</code><code class="variable-ellipsis">...</code></code>
851 <td width="15%" align="right" valign="top" class="summary">
852 <span class="summary-type"> </span>
853 </td><td class="summary">
854 <a href="lxml.html.diff-module.html#block_level_tags" class="summary-name" onclick="show_private();">block_level_tags</a> = <code title="('address',
862 ..."><code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">address</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">blockquote</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">center</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">dir</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
866 <td width="15%" align="right" valign="top" class="summary">
867 <span class="summary-type"> </span>
868 </td><td class="summary">
869 <a href="lxml.html.diff-module.html#block_level_container_tags" class="summary-name" onclick="show_private();">block_level_container_tags</a> = <code title="('dd',
877 ..."><code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">dd</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">dt</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">frameset</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">li</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">t</code><code class="variable-ellipsis">...</code></code>
881 <td width="15%" align="right" valign="top" class="summary">
882 <span class="summary-type"> </span>
883 </td><td class="summary">
884 <a name="start_whitespace_re"></a><span class="summary-name">start_whitespace_re</span> = <code title="re.compile(r'^[ \t\n\r]')">re.compile(r'^<code class="re-group">[</code> \t\n\r<code class="re-group">]</code>')</code>
888 <!-- ==================== FUNCTION DETAILS ==================== -->
889 <a name="section-FunctionDetails"></a>
890 <table class="details" border="1" cellpadding="3"
891 cellspacing="0" width="100%" bgcolor="white">
892 <tr bgcolor="#70b0f0" class="table-header">
893 <td colspan="2" class="table-header">
894 <table border="0" cellpadding="0" cellspacing="0" width="100%">
896 <td align="left"><span class="table-header">Function Details</span></td>
897 <td align="right" valign="top"
898 ><span class="options">[<a href="#section-FunctionDetails"
899 class="privatelink" onclick="toggle_private();"
900 >hide private</a>]</span></td>
906 <a name="html_annotate"></a>
908 <table class="details" border="1" cellpadding="3"
909 cellspacing="0" width="100%" bgcolor="white">
911 <table width="100%" cellpadding="0" cellspacing="0" border="0">
912 <tr valign="top"><td>
913 <h3 class="epydoc"><span class="sig"><span class="sig-name">html_annotate</span>(<span class="sig-arg">doclist</span>,
914 <span class="sig-arg">markup</span>=<span class="sig-default"><function default_markup at 0x9800d4c></span>)</span>
916 </td><td align="right" valign="top"
917 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#html_annotate">source code</a></span>
921 <p>doclist should be ordered from oldest to newest, like:</p>
922 <pre class="rst-literal-block">
923 >>> version1 = 'Hello World'
924 >>> version2 = 'Goodbye World'
925 >>> print(html_annotate([(version1, 'version 1'),
926 ... (version2, 'version 2')]))
927 <span title="version 2">Goodbye</span> <span title="version 1">World</span>
929 <p>The documents must be <em>fragments</em> (str/UTF8 or unicode), not
930 complete documents</p>
931 <p>The markup argument is a function to markup the spans of words.
932 This function is called like markup('Hello', 'version 2'), and
933 returns HTML. The first argument is text and never includes any
934 markup. The default uses a span with a title:</p>
936 <pre class="py-doctest">
937 <span class="py-prompt">>>> </span><span class="py-keyword">print</span>(default_markup(<span class="py-string">'Some Text'</span>, <span class="py-string">'by Joe'</span>))
938 <span class="py-output"><span title="by Joe">Some Text</span></span></pre>
944 <a name="htmldiff"></a>
946 <table class="details" border="1" cellpadding="3"
947 cellspacing="0" width="100%" bgcolor="white">
949 <table width="100%" cellpadding="0" cellspacing="0" border="0">
950 <tr valign="top"><td>
951 <h3 class="epydoc"><span class="sig"><span class="sig-name">htmldiff</span>(<span class="sig-arg">old_html</span>,
952 <span class="sig-arg">new_html</span>)</span>
954 </td><td align="right" valign="top"
955 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#htmldiff">source code</a></span>
959 <p>Do a diff of the old and new document. The documents are HTML
960 <em>fragments</em> (str/UTF8 or unicode), they are not complete documents
961 (i.e., no <html> tag).</p>
962 <p>Returns HTML with <ins> and <del> tags added around the
963 appropriate text.</p>
964 <p>Markup is generally ignored, with the markup from new_html
965 preserved, and possibly some markup from old_html (though it is
966 considered acceptable to lose some of the old markup). Only the
967 words in the HTML are diffed. The exception is <img> tags, which
968 are treated like words, and the href attribute of <a> tags, which
969 are noted inside the tag itself when there are changes.</p>
974 <a name="merge_delete"></a>
975 <div class="private">
976 <table class="details" border="1" cellpadding="3"
977 cellspacing="0" width="100%" bgcolor="white">
979 <table width="100%" cellpadding="0" cellspacing="0" border="0">
980 <tr valign="top"><td>
981 <h3 class="epydoc"><span class="sig"><span class="sig-name">merge_delete</span>(<span class="sig-arg">del_chunks</span>,
982 <span class="sig-arg">doc</span>)</span>
984 </td><td align="right" valign="top"
985 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#merge_delete">source code</a></span>
989 Adds the text chunks in del_chunks to the document doc (another
990 list of text chunks) with marker to show it is a delete.
991 cleanup_delete later resolves these markers into <del> tags.
996 <a name="cleanup_delete"></a>
997 <div class="private">
998 <table class="details" border="1" cellpadding="3"
999 cellspacing="0" width="100%" bgcolor="white">
1001 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1002 <tr valign="top"><td>
1003 <h3 class="epydoc"><span class="sig"><span class="sig-name">cleanup_delete</span>(<span class="sig-arg">chunks</span>)</span>
1005 </td><td align="right" valign="top"
1006 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_delete">source code</a></span>
1010 <p>Cleans up any DEL_START/DEL_END markers in the document, replacing
1011 them with <del></del>. To do this while keeping the document
1012 valid, it may need to drop some tags (either start or end tags).</p>
1013 <p>It may also move the del into adjacent tags to try to move it to a
1014 similar location where it was originally located (e.g., moving a
1015 delete into preceding <div> tag, if the del looks like (DEL_START,
1016 'Text</div>', DEL_END)</p>
1021 <a name="split_unbalanced"></a>
1022 <div class="private">
1023 <table class="details" border="1" cellpadding="3"
1024 cellspacing="0" width="100%" bgcolor="white">
1026 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1027 <tr valign="top"><td>
1028 <h3 class="epydoc"><span class="sig"><span class="sig-name">split_unbalanced</span>(<span class="sig-arg">chunks</span>)</span>
1030 </td><td align="right" valign="top"
1031 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#split_unbalanced">source code</a></span>
1035 <p>Return (unbalanced_start, balanced, unbalanced_end), where each is
1036 a list of text and tag chunks.</p>
1037 <p>unbalanced_start is a list of all the tags that are opened, but
1038 not closed in this span. Similarly, unbalanced_end is a list of
1039 tags that are closed but were not opened. Extracting these might
1040 mean some reordering of the chunks.</p>
1045 <a name="split_delete"></a>
1046 <div class="private">
1047 <table class="details" border="1" cellpadding="3"
1048 cellspacing="0" width="100%" bgcolor="white">
1050 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1051 <tr valign="top"><td>
1052 <h3 class="epydoc"><span class="sig"><span class="sig-name">split_delete</span>(<span class="sig-arg">chunks</span>)</span>
1054 </td><td align="right" valign="top"
1055 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#split_delete">source code</a></span>
1059 Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
1060 stuff_after_DEL_END). Returns the first case found (there may be
1061 more DEL_STARTs in stuff_after_DEL_END). Raises NoDeletes if
1062 there's no DEL_START found.
1067 <a name="locate_unbalanced_start"></a>
1068 <div class="private">
1069 <table class="details" border="1" cellpadding="3"
1070 cellspacing="0" width="100%" bgcolor="white">
1072 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1073 <tr valign="top"><td>
1074 <h3 class="epydoc"><span class="sig"><span class="sig-name">locate_unbalanced_start</span>(<span class="sig-arg">unbalanced_start</span>,
1075 <span class="sig-arg">pre_delete</span>,
1076 <span class="sig-arg">post_delete</span>)</span>
1078 </td><td align="right" valign="top"
1079 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#locate_unbalanced_start">source code</a></span>
1083 <p>pre_delete and post_delete implicitly point to a place in the
1084 document (where the two were split). This moves that point (by
1085 popping items from one and pushing them onto the other). It moves
1086 the point to try to find a place where unbalanced_start applies.</p>
1087 <p>As an example:</p>
1088 <pre class="rst-literal-block">
1089 >>> unbalanced_start = ['<div>']
1090 >>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>']
1091 >>> pre, post = doc[:3], doc[3:]
1092 >>> pre, post
1093 (['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>'])
1094 >>> locate_unbalanced_start(unbalanced_start, pre, post)
1095 >>> pre, post
1096 (['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>'])
1098 <p>As you can see, we moved the point so that the dangling <div> that
1099 we found will be effectively replaced by the div in the original
1100 document. If this doesn't work out, we just throw away
1101 unbalanced_start without doing anything.</p>
1106 <a name="tokenize"></a>
1107 <div class="private">
1108 <table class="details" border="1" cellpadding="3"
1109 cellspacing="0" width="100%" bgcolor="white">
1111 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1112 <tr valign="top"><td>
1113 <h3 class="epydoc"><span class="sig"><span class="sig-name">tokenize</span>(<span class="sig-arg">html</span>,
1114 <span class="sig-arg">include_hrefs</span>=<span class="sig-default">True</span>)</span>
1116 </td><td align="right" valign="top"
1117 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#tokenize">source code</a></span>
1121 <p>Parse the given HTML and returns token objects (words with attached tags).</p>
1122 <p>This parses only the content of a page; anything in the head is
1123 ignored, and the <head> and <body> elements are themselves
1124 optional. The content is then parsed by lxml, which ensures the
1125 validity of the resulting parsed document (though lxml may make
1126 incorrect guesses when the markup is particular bad).</p>
1127 <p><ins> and <del> tags are also eliminated from the document, as
1128 that gets confusing.</p>
1129 <p>If include_hrefs is true, then the href attribute of <a> tags is
1130 included as a special kind of diffable token.</p>
1135 <a name="parse_html"></a>
1136 <div class="private">
1137 <table class="details" border="1" cellpadding="3"
1138 cellspacing="0" width="100%" bgcolor="white">
1140 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1141 <tr valign="top"><td>
1142 <h3 class="epydoc"><span class="sig"><span class="sig-name">parse_html</span>(<span class="sig-arg">html</span>,
1143 <span class="sig-arg">cleanup</span>=<span class="sig-default">True</span>)</span>
1145 </td><td align="right" valign="top"
1146 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#parse_html">source code</a></span>
1150 <p>Parses an HTML fragment, returning an lxml element. Note that the HTML will be
1151 wrapped in a <div> tag that was not in the original document.</p>
1152 <p>If cleanup is true, make sure there's no <head> or <body>, and get
1153 rid of any <ins> and <del> tags.</p>
1158 <a name="cleanup_html"></a>
1159 <div class="private">
1160 <table class="details" border="1" cellpadding="3"
1161 cellspacing="0" width="100%" bgcolor="white">
1163 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1164 <tr valign="top"><td>
1165 <h3 class="epydoc"><span class="sig"><span class="sig-name">cleanup_html</span>(<span class="sig-arg">html</span>)</span>
1167 </td><td align="right" valign="top"
1168 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#cleanup_html">source code</a></span>
1172 This 'cleans' the HTML, meaning that any page structure is removed
1173 (only the contents of <body> are used, if there is any <body).
1174 Also <ins> and <del> tags are removed.
1179 <a name="flatten_el"></a>
1180 <div class="private">
1181 <table class="details" border="1" cellpadding="3"
1182 cellspacing="0" width="100%" bgcolor="white">
1184 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1185 <tr valign="top"><td>
1186 <h3 class="epydoc"><span class="sig"><span class="sig-name">flatten_el</span>(<span class="sig-arg">el</span>,
1187 <span class="sig-arg">include_hrefs</span>,
1188 <span class="sig-arg">skip_tag</span>=<span class="sig-default">False</span>)</span>
1190 </td><td align="right" valign="top"
1191 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#flatten_el">source code</a></span>
1195 <p>Takes an lxml element el, and generates all the text chunks for
1196 that tag. Each start tag is a chunk, each word is a chunk, and each
1197 end tag is a chunk.</p>
1198 <p>If skip_tag is true, then the outermost container tag is
1199 not returned (just its contents).</p>
1204 <a name="split_words"></a>
1205 <div class="private">
1206 <table class="details" border="1" cellpadding="3"
1207 cellspacing="0" width="100%" bgcolor="white">
1209 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1210 <tr valign="top"><td>
1211 <h3 class="epydoc"><span class="sig"><span class="sig-name">split_words</span>(<span class="sig-arg">text</span>)</span>
1213 </td><td align="right" valign="top"
1214 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#split_words">source code</a></span>
1218 Splits some text into words. Includes trailing whitespace (one
1219 space) on each word when appropriate.
1224 <a name="end_tag"></a>
1225 <div class="private">
1226 <table class="details" border="1" cellpadding="3"
1227 cellspacing="0" width="100%" bgcolor="white">
1229 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1230 <tr valign="top"><td>
1231 <h3 class="epydoc"><span class="sig"><span class="sig-name">end_tag</span>(<span class="sig-arg">el</span>)</span>
1233 </td><td align="right" valign="top"
1234 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#end_tag">source code</a></span>
1238 The text representation of an end tag for a tag. Includes
1239 trailing whitespace when appropriate.
1244 <a name="fixup_ins_del_tags"></a>
1245 <div class="private">
1246 <table class="details" border="1" cellpadding="3"
1247 cellspacing="0" width="100%" bgcolor="white">
1249 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1250 <tr valign="top"><td>
1251 <h3 class="epydoc"><span class="sig"><span class="sig-name">fixup_ins_del_tags</span>(<span class="sig-arg">html</span>)</span>
1253 </td><td align="right" valign="top"
1254 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#fixup_ins_del_tags">source code</a></span>
1258 Given an html string, move any <ins> or <del> tags inside of any
1259 block-level elements, e.g. transform <ins><p>word</p></ins> to
1260 <p><ins>word</ins></p>
1265 <a name="serialize_html_fragment"></a>
1266 <div class="private">
1267 <table class="details" border="1" cellpadding="3"
1268 cellspacing="0" width="100%" bgcolor="white">
1270 <table width="100%" cellpadding="0" cellspacing="0" border="0">
1271 <tr valign="top"><td>
1272 <h3 class="epydoc"><span class="sig"><span class="sig-name">serialize_html_fragment</span>(<span class="sig-arg">el</span>,
1273 <span class="sig-arg">skip_outer</span>=<span class="sig-default">False</span>)</span>
1275 </td><td align="right" valign="top"
1276 ><span class="codelink"><a href="lxml.html.diff-pysrc.html#serialize_html_fragment">source code</a></span>
1280 <p>Serialize a single lxml element as HTML. The serialized form
1281 includes the elements tail.</p>
1282 <p>If skip_outer is true, then don't serialize the outermost tag</p>
1288 <!-- ==================== VARIABLES DETAILS ==================== -->
1289 <a name="section-VariablesDetails"></a>
1290 <table class="details" border="1" cellpadding="3"
1291 cellspacing="0" width="100%" bgcolor="white">
1292 <tr bgcolor="#70b0f0" class="table-header">
1293 <td colspan="2" class="table-header">
1294 <table border="0" cellpadding="0" cellspacing="0" width="100%">
1296 <td align="left"><span class="table-header">Variables Details</span></td>
1297 <td align="right" valign="top"
1298 ><span class="options">[<a href="#section-VariablesDetails"
1299 class="privatelink" onclick="toggle_private();"
1300 >hide private</a>]</span></td>
1306 <a name="empty_tags"></a>
1307 <div class="private">
1308 <table class="details" border="1" cellpadding="3"
1309 cellspacing="0" width="100%" bgcolor="white">
1311 <h3 class="epydoc">empty_tags</h3>
1317 <dd><table><tr><td><pre class="variable">
1318 <code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">param</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1319 <code class="variable-quote">'</code><code class="variable-string">img</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1320 <code class="variable-quote">'</code><code class="variable-string">area</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1321 <code class="variable-quote">'</code><code class="variable-string">br</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1322 <code class="variable-quote">'</code><code class="variable-string">basefont</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1323 <code class="variable-quote">'</code><code class="variable-string">input</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1324 <code class="variable-quote">'</code><code class="variable-string">base</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1325 <code class="variable-quote">'</code><code class="variable-string">meta</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1326 <code class="variable-ellipsis">...</code>
1327 </pre></td></tr></table>
1332 <a name="block_level_tags"></a>
1333 <div class="private">
1334 <table class="details" border="1" cellpadding="3"
1335 cellspacing="0" width="100%" bgcolor="white">
1337 <h3 class="epydoc">block_level_tags</h3>
1343 <dd><table><tr><td><pre class="variable">
1344 <code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">address</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1345 <code class="variable-quote">'</code><code class="variable-string">blockquote</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1346 <code class="variable-quote">'</code><code class="variable-string">center</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1347 <code class="variable-quote">'</code><code class="variable-string">dir</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1348 <code class="variable-quote">'</code><code class="variable-string">div</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1349 <code class="variable-quote">'</code><code class="variable-string">dl</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1350 <code class="variable-quote">'</code><code class="variable-string">fieldset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1351 <code class="variable-quote">'</code><code class="variable-string">form</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1352 <code class="variable-ellipsis">...</code>
1353 </pre></td></tr></table>
1358 <a name="block_level_container_tags"></a>
1359 <div class="private">
1360 <table class="details" border="1" cellpadding="3"
1361 cellspacing="0" width="100%" bgcolor="white">
1363 <h3 class="epydoc">block_level_container_tags</h3>
1369 <dd><table><tr><td><pre class="variable">
1370 <code class="variable-group">(</code><code class="variable-quote">'</code><code class="variable-string">dd</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1371 <code class="variable-quote">'</code><code class="variable-string">dt</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1372 <code class="variable-quote">'</code><code class="variable-string">frameset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1373 <code class="variable-quote">'</code><code class="variable-string">li</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1374 <code class="variable-quote">'</code><code class="variable-string">tbody</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1375 <code class="variable-quote">'</code><code class="variable-string">td</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1376 <code class="variable-quote">'</code><code class="variable-string">tfoot</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1377 <code class="variable-quote">'</code><code class="variable-string">th</code><code class="variable-quote">'</code><code class="variable-op">,</code>
1378 <code class="variable-ellipsis">...</code>
1379 </pre></td></tr></table>
1385 <!-- ==================== NAVIGATION BAR ==================== -->
1386 <table class="navbar" border="0" width="100%" cellpadding="0"
1387 bgcolor="#a0c0ff" cellspacing="0">
1388 <tr valign="middle">
1390 <th> <a
1391 href="lxml-module.html">Home</a> </th>
1394 <th> <a
1395 href="module-tree.html">Trees</a> </th>
1398 <th> <a
1399 href="identifier-index.html">Indices</a> </th>
1402 <th> <a
1403 href="help.html">Help</a> </th>
1405 <!-- Project homepage -->
1406 <th class="navbar" align="right" width="100%">
1407 <table border="0" cellpadding="0" cellspacing="0">
1408 <tr><th class="navbar" align="center"
1409 ><a class="navbar" target="_top" href="http://codespeak.net/lxml/">lxml API</a></th>
1413 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
1415 <td align="left" class="footer">
1416 Generated by Epydoc 3.0 on Fri Oct 30 14:51:44 2009
1418 <td align="right" class="footer">
1419 <a target="mainFrame" href="http://epydoc.sourceforge.net"
1420 >http://epydoc.sourceforge.net</a>
1425 <script type="text/javascript">
1427 // Private objects are initially displayed (because if
1428 // javascript is turned off then we want them to be
1429 // visible); but by default, we want to hide them. So hide
1430 // them unless we have a cookie that says to show them.