Upload Tizen:Base source
[toolchains/python-lxml.git] / src / lxml / serializer.pxi
1 # XML serialization and output functions
2
3 class SerialisationError(LxmlError):
4     u"""A libxml2 error that occurred during serialisation.
5     """
6
7 cdef enum _OutputMethods:
8     OUTPUT_METHOD_XML
9     OUTPUT_METHOD_HTML
10     OUTPUT_METHOD_TEXT
11
12 cdef int _findOutputMethod(method) except -1:
13     if method is None:
14         return OUTPUT_METHOD_XML
15     method = method.lower()
16     if method == u"xml":
17         return OUTPUT_METHOD_XML
18     if method == u"html":
19         return OUTPUT_METHOD_HTML
20     if method == u"text":
21         return OUTPUT_METHOD_TEXT
22     raise ValueError, u"unknown output method %r" % method
23
24 cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
25     cdef bint needs_conversion
26     cdef char* c_text
27     cdef xmlNode* c_text_node
28     cdef tree.xmlBuffer* c_buffer
29     cdef int error_result
30
31     c_buffer = tree.xmlBufferCreate()
32     if c_buffer is NULL:
33         return python.PyErr_NoMemory()
34
35     with nogil:
36         error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
37         if with_tail:
38             c_text_node = _textNodeOrSkip(c_node.next)
39             while c_text_node is not NULL:
40                 tree.xmlBufferWriteChar(c_buffer, c_text_node.content)
41                 c_text_node = _textNodeOrSkip(c_text_node.next)
42         c_text = tree.xmlBufferContent(c_buffer)
43
44     if error_result < 0 or c_text is NULL:
45         tree.xmlBufferFree(c_buffer)
46         raise SerialisationError, u"Error during serialisation (out of memory?)"
47
48     try:
49         needs_conversion = 0
50         if encoding is _unicode:
51             needs_conversion = 1
52         elif encoding is not None:
53             encoding = encoding.upper()
54             if encoding != u'UTF-8':
55                 if encoding == u'ASCII':
56                     if isutf8(c_text):
57                         # will raise a decode error below
58                         needs_conversion = 1
59                 else:
60                     needs_conversion = 1
61
62         if needs_conversion:
63             text = python.PyUnicode_DecodeUTF8(
64                 c_text, tree.xmlBufferLength(c_buffer), 'strict')
65             if encoding is not _unicode:
66                 encoding = _utf8(encoding)
67                 text = python.PyUnicode_AsEncodedString(
68                     text, encoding, 'strict')
69         else:
70             text = c_text
71     finally:
72         tree.xmlBufferFree(c_buffer)
73     return text
74
75
76 cdef _tostring(_Element element, encoding, method,
77                bint write_xml_declaration, bint write_complete_document,
78                bint pretty_print, bint with_tail, int standalone):
79     u"""Serialize an element to an encoded string representation of its XML
80     tree.
81     """
82     cdef tree.xmlOutputBuffer* c_buffer
83     cdef tree.xmlBuffer* c_result_buffer
84     cdef tree.xmlCharEncodingHandler* enchandler
85     cdef char* c_enc
86     cdef char* c_version
87     cdef int c_method
88     cdef int error_result
89     if element is None:
90         return None
91     c_method = _findOutputMethod(method)
92     if c_method == OUTPUT_METHOD_TEXT:
93         return _textToString(element._c_node, encoding, with_tail)
94     if encoding is None or encoding is _unicode:
95         c_enc = NULL
96     else:
97         encoding = _utf8(encoding)
98         c_enc = _cstr(encoding)
99     # it is necessary to *and* find the encoding handler *and* use
100     # encoding during output
101     enchandler = tree.xmlFindCharEncodingHandler(c_enc)
102     if enchandler is NULL and c_enc is not NULL:
103         if encoding is not None:
104             encoding = encoding.decode(u'UTF-8')
105         raise LookupError, u"unknown encoding: '%s'" % encoding
106     c_buffer = tree.xmlAllocOutputBuffer(enchandler)
107     if c_buffer is NULL:
108         tree.xmlCharEncCloseFunc(enchandler)
109         return python.PyErr_NoMemory()
110
111     with nogil:
112         _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
113                            write_xml_declaration, write_complete_document,
114                            pretty_print, with_tail, standalone)
115         tree.xmlOutputBufferFlush(c_buffer)
116         if c_buffer.conv is not NULL:
117             c_result_buffer = c_buffer.conv
118         else:
119             c_result_buffer = c_buffer.buffer
120
121     error_result = c_buffer.error
122     if error_result != xmlerror.XML_ERR_OK:
123         tree.xmlOutputBufferClose(c_buffer)
124         _raiseSerialisationError(error_result)
125
126     try:
127         if encoding is _unicode:
128             result = python.PyUnicode_DecodeUTF8(
129                 tree.xmlBufferContent(c_result_buffer),
130                 tree.xmlBufferLength(c_result_buffer),
131                 'strict')
132         else:
133             result = python.PyString_FromStringAndSize(
134                 tree.xmlBufferContent(c_result_buffer),
135                 tree.xmlBufferLength(c_result_buffer))
136     finally:
137         error_result = tree.xmlOutputBufferClose(c_buffer)
138     if error_result < 0:
139         _raiseSerialisationError(error_result)
140     return result
141
142 cdef _raiseSerialisationError(int error_result):
143     if error_result == xmlerror.XML_ERR_NO_MEMORY:
144         return python.PyErr_NoMemory()
145     else:
146         message = ErrorTypes._getName(error_result)
147         if message is None:
148             message = u"unknown error %d" % error_result
149         raise SerialisationError, message
150
151 ############################################################
152 # low-level serialisation functions
153
154 cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
155                              xmlNode* c_node, char* encoding, int c_method,
156                              bint write_xml_declaration,
157                              bint write_complete_document,
158                              bint pretty_print, bint with_tail,
159                              int standalone) nogil:
160     cdef xmlDoc* c_doc
161     cdef xmlNode* c_nsdecl_node
162     c_doc = c_node.doc
163     if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
164         _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
165
166     # write internal DTD subset, preceding PIs/comments, etc.
167     if write_complete_document:
168         _writeDtdToBuffer(c_buffer, c_doc, c_node.name, encoding)
169         _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
170
171     c_nsdecl_node = c_node
172     if c_node.parent is NULL or c_node.parent.type != tree.XML_DOCUMENT_NODE:
173         # copy the node and add namespaces from parents
174         # this is required to make libxml write them
175         c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
176         if c_nsdecl_node is NULL:
177             c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
178             return
179         _copyParentNamespaces(c_node, c_nsdecl_node)
180
181         c_nsdecl_node.parent = c_node.parent
182         c_nsdecl_node.children = c_node.children
183         c_nsdecl_node.last = c_node.last
184
185     # write node
186     if c_method == OUTPUT_METHOD_XML:
187         tree.xmlNodeDumpOutput(
188             c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
189     else:
190         tree.htmlNodeDumpFormatOutput(
191             c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
192
193     if c_nsdecl_node is not c_node:
194         # clean up
195         c_nsdecl_node.children = c_nsdecl_node.last = NULL
196         tree.xmlFreeNode(c_nsdecl_node)
197
198     # write tail, trailing comments, etc.
199     if with_tail:
200         _writeTail(c_buffer, c_node, encoding, pretty_print)
201     if write_complete_document:
202         _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
203     if pretty_print:
204         tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
205
206 cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
207                                     char* version, char* encoding,
208                                     int standalone) nogil:
209     if version is NULL:
210         version = "1.0"
211     tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
212     tree.xmlOutputBufferWriteString(c_buffer, version)
213     tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
214     tree.xmlOutputBufferWriteString(c_buffer, encoding)
215     if standalone == 0:
216         tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
217     elif standalone == 1:
218         tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
219     else:
220         tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
221
222 cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
223                             xmlDoc* c_doc, char* c_root_name,
224                             char* encoding) nogil:
225     cdef tree.xmlDtd* c_dtd
226     cdef xmlNode* c_node
227     c_dtd = c_doc.intSubset
228     if c_dtd is NULL or c_dtd.name is NULL:
229         return
230     if cstd.strcmp(c_root_name, c_dtd.name) != 0:
231         return
232     tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
233     tree.xmlOutputBufferWriteString(c_buffer, c_dtd.name)
234     if c_dtd.SystemID != NULL and c_dtd.SystemID[0] != c'\0':
235         if c_dtd.ExternalID != NULL and c_dtd.ExternalID[0] != c'\0':
236             tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
237             tree.xmlOutputBufferWriteString(c_buffer, c_dtd.ExternalID)
238             tree.xmlOutputBufferWrite(c_buffer, 3, '" "')
239         else:
240             tree.xmlOutputBufferWrite(c_buffer, 9, ' SYSTEM "')
241         tree.xmlOutputBufferWriteString(c_buffer, c_dtd.SystemID)
242         tree.xmlOutputBufferWrite(c_buffer, 1, '"')
243     if c_dtd.entities == NULL and c_dtd.elements == NULL and \
244            c_dtd.attributes == NULL and c_dtd.notations == NULL and \
245            c_dtd.pentities == NULL:
246         tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
247         return
248     tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
249     if c_dtd.notations != NULL:
250         tree.xmlDumpNotationTable(c_buffer.buffer,
251                                   <tree.xmlNotationTable*>c_dtd.notations)
252     c_node = c_dtd.children
253     while c_node is not NULL:
254         tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
255         c_node = c_node.next
256     tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
257
258 cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
259                      char* encoding, bint pretty_print) nogil:
260     u"Write the element tail."
261     c_node = c_node.next
262     while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
263         tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0,
264                                pretty_print, encoding)
265         c_node = c_node.next
266
267 cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
268                              char* encoding, bint pretty_print) nogil:
269     cdef xmlNode* c_sibling
270     if c_node.parent is not NULL and _isElement(c_node.parent):
271         return
272     # we are at a root node, so add PI and comment siblings
273     c_sibling = c_node
274     while c_sibling.prev != NULL and \
275             (c_sibling.prev.type == tree.XML_PI_NODE or \
276                  c_sibling.prev.type == tree.XML_COMMENT_NODE):
277         c_sibling = c_sibling.prev
278     while c_sibling != c_node:
279         tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
280                                pretty_print, encoding)
281         if pretty_print:
282             tree.xmlOutputBufferWriteString(c_buffer, "\n")
283         c_sibling = c_sibling.next
284
285 cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
286                              char* encoding, bint pretty_print) nogil:
287     cdef xmlNode* c_sibling
288     if c_node.parent is not NULL and _isElement(c_node.parent):
289         return
290     # we are at a root node, so add PI and comment siblings
291     c_sibling = c_node.next
292     while c_sibling != NULL and \
293             (c_sibling.type == tree.XML_PI_NODE or \
294                  c_sibling.type == tree.XML_COMMENT_NODE):
295         if pretty_print:
296             tree.xmlOutputBufferWriteString(c_buffer, "\n")
297         tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
298                                pretty_print, encoding)
299         c_sibling = c_sibling.next
300
301 ############################################################
302 # output to file-like objects
303
304 cdef class _FilelikeWriter:
305     cdef object _filelike
306     cdef object _close_filelike
307     cdef _ExceptionContext _exc_context
308     cdef _ErrorLog error_log
309     def __init__(self, filelike, exc_context=None, compression=None):
310         if compression is not None and compression > 0:
311             filelike = gzip.GzipFile(
312                 fileobj=filelike, mode=u'wb', compresslevel=compression)
313             self._close_filelike = filelike.close
314         self._filelike = filelike
315         if exc_context is None:
316             self._exc_context = _ExceptionContext()
317         else:
318             self._exc_context = exc_context
319         self.error_log = _ErrorLog()
320
321     cdef tree.xmlOutputBuffer* _createOutputBuffer(
322         self, tree.xmlCharEncodingHandler* enchandler) except NULL:
323         cdef tree.xmlOutputBuffer* c_buffer
324         c_buffer = tree.xmlOutputBufferCreateIO(
325             _writeFilelikeWriter, _closeFilelikeWriter,
326             <python.PyObject*>self, enchandler)
327         if c_buffer is NULL:
328             raise IOError, u"Could not create I/O writer context."
329         return c_buffer
330
331     cdef int write(self, char* c_buffer, int size):
332         try:
333             if self._filelike is None:
334                 raise IOError, u"File is already closed"
335             py_buffer = python.PyString_FromStringAndSize(c_buffer, size)
336             self._filelike.write(py_buffer)
337             return size
338         except:
339             self._exc_context._store_raised()
340             return -1
341
342     cdef int close(self):
343         try:
344             if self._close_filelike is not None:
345                 self._close_filelike()
346             # we should not close the file here as we didn't open it
347             self._filelike = None
348             return 0
349         except:
350             self._exc_context._store_raised()
351             return -1
352
353 cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int len):
354     return (<_FilelikeWriter>ctxt).write(c_buffer, len)
355
356 cdef int _closeFilelikeWriter(void* ctxt):
357     return (<_FilelikeWriter>ctxt).close()
358
359 cdef _tofilelike(f, _Element element, encoding, method,
360                  bint write_xml_declaration, bint write_doctype,
361                  bint pretty_print, bint with_tail, int standalone,
362                  int compression):
363     cdef python.PyThreadState* state = NULL
364     cdef _FilelikeWriter writer
365     cdef tree.xmlOutputBuffer* c_buffer
366     cdef tree.xmlCharEncodingHandler* enchandler
367     cdef char* c_enc
368     cdef int error_result
369     if encoding is None:
370         c_enc = NULL
371     else:
372         encoding = _utf8(encoding)
373         c_enc = _cstr(encoding)
374     c_method = _findOutputMethod(method)
375     if c_method == OUTPUT_METHOD_TEXT:
376         data = _textToString(element._c_node, encoding, with_tail)
377         if compression:
378             bytes_out = BytesIO()
379             gzip_file = gzip.GzipFile(
380                 fileobj=bytes_out, mode=u'wb', compresslevel=compression)
381             try:
382                 gzip_file.write(data)
383             finally:
384                 gzip_file.close()
385             data = bytes_out
386         if _isString(f):
387             filename8 = _encodeFilename(f)
388             f = open(filename8, u'wb')
389             try:
390                 f.write(data)
391             finally:
392                 f.close()
393         else:
394             f.write(data)
395         return
396     enchandler = tree.xmlFindCharEncodingHandler(c_enc)
397     if enchandler is NULL:
398         if encoding is not None:
399             encoding = encoding.decode(u'UTF-8')
400         raise LookupError, u"unknown encoding: '%s'" % encoding
401
402     if _isString(f):
403         filename8 = _encodeFilename(f)
404         c_buffer = tree.xmlOutputBufferCreateFilename(
405             _cstr(filename8), enchandler, compression)
406         if c_buffer is NULL:
407             return python.PyErr_SetFromErrno(IOError)
408         state = python.PyEval_SaveThread()
409     elif hasattr(f, u'write'):
410         writer   = _FilelikeWriter(f, compression=compression)
411         c_buffer = writer._createOutputBuffer(enchandler)
412     else:
413         tree.xmlCharEncCloseFunc(enchandler)
414         raise TypeError, \
415             u"File or filename expected, got '%s'" % funicode(python._fqtypename(f))
416
417     _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
418                        write_xml_declaration, write_doctype,
419                        pretty_print, with_tail, standalone)
420     error_result = c_buffer.error
421     if error_result == xmlerror.XML_ERR_OK:
422         error_result = tree.xmlOutputBufferClose(c_buffer)
423         if error_result > 0:
424             error_result = xmlerror.XML_ERR_OK
425     else:
426         tree.xmlOutputBufferClose(c_buffer)
427     if writer is None:
428         python.PyEval_RestoreThread(state)
429     else:
430         writer._exc_context._raise_if_stored()
431     if error_result != xmlerror.XML_ERR_OK:
432         _raiseSerialisationError(error_result)
433
434 cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
435                      int compression):
436     cdef _FilelikeWriter writer
437     cdef tree.xmlOutputBuffer* c_buffer
438     cdef char* c_filename
439     cdef xmlDoc* c_base_doc
440     cdef xmlDoc* c_doc
441     cdef int bytes = -1
442
443     c_base_doc = element._c_node.doc
444     c_doc = _fakeRootDoc(c_base_doc, element._c_node)
445     try:
446         if _isString(f):
447             filename8 = _encodeFilename(f)
448             c_filename = _cstr(filename8)
449             with nogil:
450                 bytes = c14n.xmlC14NDocSave(c_doc, NULL, exclusive, NULL,
451                                             with_comments, c_filename, compression)
452         elif hasattr(f, u'write'):
453             writer   = _FilelikeWriter(f, compression=compression)
454             c_buffer = writer._createOutputBuffer(NULL)
455             writer.error_log.connect()
456             bytes = c14n.xmlC14NDocSaveTo(c_doc, NULL, exclusive, NULL,
457                                           with_comments, c_buffer)
458             writer.error_log.disconnect()
459             if bytes >= 0:
460                 bytes = tree.xmlOutputBufferClose(c_buffer)
461             else:
462                 tree.xmlOutputBufferClose(c_buffer)
463         else:
464             raise TypeError, \
465                 u"File or filename expected, got '%s'" % funicode(python._fqtypename(f))
466     finally:
467         _destroyFakeDoc(c_base_doc, c_doc)
468
469     if writer is not None:
470         writer._exc_context._raise_if_stored()
471
472     if bytes < 0:
473         message = u"C14N failed"
474         if writer is not None:
475             errors = writer.error_log
476             if len(errors):
477                 message = errors[0].message
478         raise C14NError, message
479
480 # dump node to file (mainly for debug)
481
482 cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print, bint with_tail):
483     cdef tree.xmlOutputBuffer* c_buffer
484     cdef cstd.FILE* c_file
485     c_file = python.PyFile_AsFile(f)
486     if c_file is NULL:
487         raise ValueError, u"not a file"
488     c_buffer = tree.xmlOutputBufferCreateFile(c_file, NULL)
489     tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
490     if with_tail:
491         _writeTail(c_buffer, c_node, NULL, 0)
492     if not pretty_print:
493         # not written yet
494         tree.xmlOutputBufferWriteString(c_buffer, '\n')
495     tree.xmlOutputBufferFlush(c_buffer)