1 # XML serialization and output functions
3 class SerialisationError(LxmlError):
4 u"""A libxml2 error that occurred during serialisation.
7 cdef enum _OutputMethods:
12 cdef int _findOutputMethod(method) except -1:
14 return OUTPUT_METHOD_XML
15 method = method.lower()
17 return OUTPUT_METHOD_XML
19 return OUTPUT_METHOD_HTML
21 return OUTPUT_METHOD_TEXT
22 raise ValueError, u"unknown output method %r" % method
24 cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
25 cdef bint needs_conversion
27 cdef xmlNode* c_text_node
28 cdef tree.xmlBuffer* c_buffer
31 c_buffer = tree.xmlBufferCreate()
33 return python.PyErr_NoMemory()
36 error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
38 c_text_node = _textNodeOrSkip(c_node.next)
39 while c_text_node is not NULL:
40 tree.xmlBufferWriteChar(c_buffer, c_text_node.content)
41 c_text_node = _textNodeOrSkip(c_text_node.next)
42 c_text = tree.xmlBufferContent(c_buffer)
44 if error_result < 0 or c_text is NULL:
45 tree.xmlBufferFree(c_buffer)
46 raise SerialisationError, u"Error during serialisation (out of memory?)"
50 if encoding is _unicode:
52 elif encoding is not None:
53 encoding = encoding.upper()
54 if encoding != u'UTF-8':
55 if encoding == u'ASCII':
57 # will raise a decode error below
63 text = python.PyUnicode_DecodeUTF8(
64 c_text, tree.xmlBufferLength(c_buffer), 'strict')
65 if encoding is not _unicode:
66 encoding = _utf8(encoding)
67 text = python.PyUnicode_AsEncodedString(
68 text, encoding, 'strict')
72 tree.xmlBufferFree(c_buffer)
76 cdef _tostring(_Element element, encoding, method,
77 bint write_xml_declaration, bint write_complete_document,
78 bint pretty_print, bint with_tail, int standalone):
79 u"""Serialize an element to an encoded string representation of its XML
82 cdef tree.xmlOutputBuffer* c_buffer
83 cdef tree.xmlBuffer* c_result_buffer
84 cdef tree.xmlCharEncodingHandler* enchandler
91 c_method = _findOutputMethod(method)
92 if c_method == OUTPUT_METHOD_TEXT:
93 return _textToString(element._c_node, encoding, with_tail)
94 if encoding is None or encoding is _unicode:
97 encoding = _utf8(encoding)
98 c_enc = _cstr(encoding)
99 # it is necessary to *and* find the encoding handler *and* use
100 # encoding during output
101 enchandler = tree.xmlFindCharEncodingHandler(c_enc)
102 if enchandler is NULL and c_enc is not NULL:
103 if encoding is not None:
104 encoding = encoding.decode(u'UTF-8')
105 raise LookupError, u"unknown encoding: '%s'" % encoding
106 c_buffer = tree.xmlAllocOutputBuffer(enchandler)
108 tree.xmlCharEncCloseFunc(enchandler)
109 return python.PyErr_NoMemory()
112 _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
113 write_xml_declaration, write_complete_document,
114 pretty_print, with_tail, standalone)
115 tree.xmlOutputBufferFlush(c_buffer)
116 if c_buffer.conv is not NULL:
117 c_result_buffer = c_buffer.conv
119 c_result_buffer = c_buffer.buffer
121 error_result = c_buffer.error
122 if error_result != xmlerror.XML_ERR_OK:
123 tree.xmlOutputBufferClose(c_buffer)
124 _raiseSerialisationError(error_result)
127 if encoding is _unicode:
128 result = python.PyUnicode_DecodeUTF8(
129 tree.xmlBufferContent(c_result_buffer),
130 tree.xmlBufferLength(c_result_buffer),
133 result = python.PyString_FromStringAndSize(
134 tree.xmlBufferContent(c_result_buffer),
135 tree.xmlBufferLength(c_result_buffer))
137 error_result = tree.xmlOutputBufferClose(c_buffer)
139 _raiseSerialisationError(error_result)
142 cdef _raiseSerialisationError(int error_result):
143 if error_result == xmlerror.XML_ERR_NO_MEMORY:
144 return python.PyErr_NoMemory()
146 message = ErrorTypes._getName(error_result)
148 message = u"unknown error %d" % error_result
149 raise SerialisationError, message
151 ############################################################
152 # low-level serialisation functions
154 cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
155 xmlNode* c_node, char* encoding, int c_method,
156 bint write_xml_declaration,
157 bint write_complete_document,
158 bint pretty_print, bint with_tail,
159 int standalone) nogil:
161 cdef xmlNode* c_nsdecl_node
163 if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
164 _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
166 # write internal DTD subset, preceding PIs/comments, etc.
167 if write_complete_document:
168 _writeDtdToBuffer(c_buffer, c_doc, c_node.name, encoding)
169 _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
171 c_nsdecl_node = c_node
172 if c_node.parent is NULL or c_node.parent.type != tree.XML_DOCUMENT_NODE:
173 # copy the node and add namespaces from parents
174 # this is required to make libxml write them
175 c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
176 if c_nsdecl_node is NULL:
177 c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
179 _copyParentNamespaces(c_node, c_nsdecl_node)
181 c_nsdecl_node.parent = c_node.parent
182 c_nsdecl_node.children = c_node.children
183 c_nsdecl_node.last = c_node.last
186 if c_method == OUTPUT_METHOD_XML:
187 tree.xmlNodeDumpOutput(
188 c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
190 tree.htmlNodeDumpFormatOutput(
191 c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
193 if c_nsdecl_node is not c_node:
195 c_nsdecl_node.children = c_nsdecl_node.last = NULL
196 tree.xmlFreeNode(c_nsdecl_node)
198 # write tail, trailing comments, etc.
200 _writeTail(c_buffer, c_node, encoding, pretty_print)
201 if write_complete_document:
202 _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
204 tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
206 cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
207 char* version, char* encoding,
208 int standalone) nogil:
211 tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
212 tree.xmlOutputBufferWriteString(c_buffer, version)
213 tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
214 tree.xmlOutputBufferWriteString(c_buffer, encoding)
216 tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
217 elif standalone == 1:
218 tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
220 tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
222 cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
223 xmlDoc* c_doc, char* c_root_name,
224 char* encoding) nogil:
225 cdef tree.xmlDtd* c_dtd
227 c_dtd = c_doc.intSubset
228 if c_dtd is NULL or c_dtd.name is NULL:
230 if cstd.strcmp(c_root_name, c_dtd.name) != 0:
232 tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
233 tree.xmlOutputBufferWriteString(c_buffer, c_dtd.name)
234 if c_dtd.SystemID != NULL and c_dtd.SystemID[0] != c'\0':
235 if c_dtd.ExternalID != NULL and c_dtd.ExternalID[0] != c'\0':
236 tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
237 tree.xmlOutputBufferWriteString(c_buffer, c_dtd.ExternalID)
238 tree.xmlOutputBufferWrite(c_buffer, 3, '" "')
240 tree.xmlOutputBufferWrite(c_buffer, 9, ' SYSTEM "')
241 tree.xmlOutputBufferWriteString(c_buffer, c_dtd.SystemID)
242 tree.xmlOutputBufferWrite(c_buffer, 1, '"')
243 if c_dtd.entities == NULL and c_dtd.elements == NULL and \
244 c_dtd.attributes == NULL and c_dtd.notations == NULL and \
245 c_dtd.pentities == NULL:
246 tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
248 tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
249 if c_dtd.notations != NULL:
250 tree.xmlDumpNotationTable(c_buffer.buffer,
251 <tree.xmlNotationTable*>c_dtd.notations)
252 c_node = c_dtd.children
253 while c_node is not NULL:
254 tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
256 tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
258 cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
259 char* encoding, bint pretty_print) nogil:
260 u"Write the element tail."
262 while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
263 tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0,
264 pretty_print, encoding)
267 cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
268 char* encoding, bint pretty_print) nogil:
269 cdef xmlNode* c_sibling
270 if c_node.parent is not NULL and _isElement(c_node.parent):
272 # we are at a root node, so add PI and comment siblings
274 while c_sibling.prev != NULL and \
275 (c_sibling.prev.type == tree.XML_PI_NODE or \
276 c_sibling.prev.type == tree.XML_COMMENT_NODE):
277 c_sibling = c_sibling.prev
278 while c_sibling != c_node:
279 tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
280 pretty_print, encoding)
282 tree.xmlOutputBufferWriteString(c_buffer, "\n")
283 c_sibling = c_sibling.next
285 cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
286 char* encoding, bint pretty_print) nogil:
287 cdef xmlNode* c_sibling
288 if c_node.parent is not NULL and _isElement(c_node.parent):
290 # we are at a root node, so add PI and comment siblings
291 c_sibling = c_node.next
292 while c_sibling != NULL and \
293 (c_sibling.type == tree.XML_PI_NODE or \
294 c_sibling.type == tree.XML_COMMENT_NODE):
296 tree.xmlOutputBufferWriteString(c_buffer, "\n")
297 tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
298 pretty_print, encoding)
299 c_sibling = c_sibling.next
301 ############################################################
302 # output to file-like objects
304 cdef class _FilelikeWriter:
305 cdef object _filelike
306 cdef object _close_filelike
307 cdef _ExceptionContext _exc_context
308 cdef _ErrorLog error_log
309 def __init__(self, filelike, exc_context=None, compression=None):
310 if compression is not None and compression > 0:
311 filelike = gzip.GzipFile(
312 fileobj=filelike, mode=u'wb', compresslevel=compression)
313 self._close_filelike = filelike.close
314 self._filelike = filelike
315 if exc_context is None:
316 self._exc_context = _ExceptionContext()
318 self._exc_context = exc_context
319 self.error_log = _ErrorLog()
321 cdef tree.xmlOutputBuffer* _createOutputBuffer(
322 self, tree.xmlCharEncodingHandler* enchandler) except NULL:
323 cdef tree.xmlOutputBuffer* c_buffer
324 c_buffer = tree.xmlOutputBufferCreateIO(
325 _writeFilelikeWriter, _closeFilelikeWriter,
326 <python.PyObject*>self, enchandler)
328 raise IOError, u"Could not create I/O writer context."
331 cdef int write(self, char* c_buffer, int size):
333 if self._filelike is None:
334 raise IOError, u"File is already closed"
335 py_buffer = python.PyString_FromStringAndSize(c_buffer, size)
336 self._filelike.write(py_buffer)
339 self._exc_context._store_raised()
342 cdef int close(self):
344 if self._close_filelike is not None:
345 self._close_filelike()
346 # we should not close the file here as we didn't open it
347 self._filelike = None
350 self._exc_context._store_raised()
353 cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
354 return (<_FilelikeWriter>ctxt).write(c_buffer, len)
356 cdef int _closeFilelikeWriter(void* ctxt):
357 return (<_FilelikeWriter>ctxt).close()
359 cdef _tofilelike(f, _Element element, encoding, method,
360 bint write_xml_declaration, bint write_doctype,
361 bint pretty_print, bint with_tail, int standalone,
363 cdef python.PyThreadState* state = NULL
364 cdef _FilelikeWriter writer
365 cdef tree.xmlOutputBuffer* c_buffer
366 cdef tree.xmlCharEncodingHandler* enchandler
368 cdef int error_result
372 encoding = _utf8(encoding)
373 c_enc = _cstr(encoding)
374 c_method = _findOutputMethod(method)
375 if c_method == OUTPUT_METHOD_TEXT:
376 data = _textToString(element._c_node, encoding, with_tail)
378 bytes_out = BytesIO()
379 gzip_file = gzip.GzipFile(
380 fileobj=bytes_out, mode=u'wb', compresslevel=compression)
382 gzip_file.write(data)
387 filename8 = _encodeFilename(f)
388 f = open(filename8, u'wb')
396 enchandler = tree.xmlFindCharEncodingHandler(c_enc)
397 if enchandler is NULL:
398 if encoding is not None:
399 encoding = encoding.decode(u'UTF-8')
400 raise LookupError, u"unknown encoding: '%s'" % encoding
403 filename8 = _encodeFilename(f)
404 c_buffer = tree.xmlOutputBufferCreateFilename(
405 _cstr(filename8), enchandler, compression)
407 return python.PyErr_SetFromErrno(IOError)
408 state = python.PyEval_SaveThread()
409 elif hasattr(f, u'write'):
410 writer = _FilelikeWriter(f, compression=compression)
411 c_buffer = writer._createOutputBuffer(enchandler)
413 tree.xmlCharEncCloseFunc(enchandler)
415 u"File or filename expected, got '%s'" % funicode(python._fqtypename(f))
417 _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
418 write_xml_declaration, write_doctype,
419 pretty_print, with_tail, standalone)
420 error_result = c_buffer.error
421 if error_result == xmlerror.XML_ERR_OK:
422 error_result = tree.xmlOutputBufferClose(c_buffer)
424 error_result = xmlerror.XML_ERR_OK
426 tree.xmlOutputBufferClose(c_buffer)
428 python.PyEval_RestoreThread(state)
430 writer._exc_context._raise_if_stored()
431 if error_result != xmlerror.XML_ERR_OK:
432 _raiseSerialisationError(error_result)
434 cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
436 cdef _FilelikeWriter writer
437 cdef tree.xmlOutputBuffer* c_buffer
438 cdef char* c_filename
439 cdef xmlDoc* c_base_doc
443 c_base_doc = element._c_node.doc
444 c_doc = _fakeRootDoc(c_base_doc, element._c_node)
447 filename8 = _encodeFilename(f)
448 c_filename = _cstr(filename8)
450 bytes = c14n.xmlC14NDocSave(c_doc, NULL, exclusive, NULL,
451 with_comments, c_filename, compression)
452 elif hasattr(f, u'write'):
453 writer = _FilelikeWriter(f, compression=compression)
454 c_buffer = writer._createOutputBuffer(NULL)
455 writer.error_log.connect()
456 bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, exclusive, NULL,
457 with_comments, c_buffer)
458 writer.error_log.disconnect()
460 bytes = tree.xmlOutputBufferClose(c_buffer)
462 tree.xmlOutputBufferClose(c_buffer)
465 u"File or filename expected, got '%s'" % funicode(python._fqtypename(f))
467 _destroyFakeDoc(c_base_doc, c_doc)
469 if writer is not None:
470 writer._exc_context._raise_if_stored()
473 message = u"C14N failed"
474 if writer is not None:
475 errors = writer.error_log
477 message = errors[0].message
478 raise C14NError, message
480 # dump node to file (mainly for debug)
482 cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print, bint with_tail):
483 cdef tree.xmlOutputBuffer* c_buffer
484 cdef cstd.FILE* c_file
485 c_file = python.PyFile_AsFile(f)
487 raise ValueError, u"not a file"
488 c_buffer = tree.xmlOutputBufferCreateFile(c_file, NULL)
489 tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
491 _writeTail(c_buffer, c_node, NULL, 0)
494 tree.xmlOutputBufferWriteString(c_buffer, '\n')
495 tree.xmlOutputBufferFlush(c_buffer)