3 from lxml.includes cimport xslt
6 cdef class XSLTError(LxmlError):
7 """Base class of all XSLT errors.
10 cdef class XSLTParseError(XSLTError):
11 """Error parsing a stylesheet document.
14 cdef class XSLTApplyError(XSLTError):
15 """Error running an XSL transformation.
18 class XSLTSaveError(XSLTError, SerialisationError):
19 """Error serialising an XSLT result.
22 cdef class XSLTExtensionError(XSLTError):
23 """Error registering an XSLT extension.
28 LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
29 LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
32 ################################################################################
33 # Where do we store what?
35 # xsltStylesheet->doc->_private
36 # == _XSLTResolverContext for XSL stylesheet
38 # xsltTransformContext->_private
39 # == _XSLTResolverContext for transformed document
41 ################################################################################
44 ################################################################################
45 # XSLT document loaders
49 cdef class _XSLTResolverContext(_ResolverContext):
50 cdef xmlDoc* _c_style_doc
51 cdef _BaseParser _parser
53 cdef _XSLTResolverContext _copy(self):
54 cdef _XSLTResolverContext context
55 context = _XSLTResolverContext()
56 _initXSLTResolverContext(context, self._parser)
57 context._c_style_doc = self._c_style_doc
60 cdef _initXSLTResolverContext(_XSLTResolverContext context,
62 _initResolverContext(context, parser.resolvers)
63 context._parser = parser
64 context._c_style_doc = NULL
66 cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
67 int parse_options, int* error) with gil:
68 # call the Python document loaders
69 cdef _XSLTResolverContext context
70 cdef _ResolverRegistry resolvers
71 cdef _InputDocument doc_ref
73 cdef xmlDoc* c_return_doc = NULL
76 context = <_XSLTResolverContext>c_context
78 # shortcut if we resolve the stylesheet itself
79 c_doc = context._c_style_doc
81 if c_doc is not NULL and c_doc.URL is not NULL:
82 if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
83 c_return_doc = _copyDoc(c_doc, 1)
84 return c_return_doc # 'goto', see 'finally' below
86 # delegate to the Python resolvers
87 resolvers = context._resolvers
88 if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
90 uri = _decodeFilename(c_uri)
91 doc_ref = resolvers.resolve(uri, None, context)
93 if doc_ref is not None:
94 if doc_ref._type == PARSER_DATA_STRING:
95 c_return_doc = _parseDoc(
96 doc_ref._data_bytes, doc_ref._filename, context._parser)
97 elif doc_ref._type == PARSER_DATA_FILENAME:
98 c_return_doc = _parseDocFromFile(
99 doc_ref._filename, context._parser)
100 elif doc_ref._type == PARSER_DATA_FILE:
101 c_return_doc = _parseDocFromFilelike(
102 doc_ref._file, doc_ref._filename, context._parser)
103 elif doc_ref._type == PARSER_DATA_EMPTY:
104 c_return_doc = _newXMLDoc()
105 if c_return_doc is not NULL and c_return_doc.URL is NULL:
106 c_return_doc.URL = tree.xmlStrdup(c_uri)
109 context._store_raised()
111 return c_return_doc # and swallow any further exceptions
114 cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
115 xslt.xsltLoadType c_type) with gil:
117 message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
118 if c_type == xslt.XSLT_LOAD_DOCUMENT:
119 exception = XSLTApplyError(message)
121 exception = XSLTParseError(message)
122 (<_XSLTResolverContext>context)._store_exception(exception)
123 except BaseException as e:
124 (<_XSLTResolverContext>context)._store_exception(e)
126 return # and swallow any further exceptions
129 cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
130 int parse_options, void* c_ctxt,
131 xslt.xsltLoadType c_type) nogil:
132 # nogil => no Python objects here, may be called without thread context !
135 cdef void* c_pcontext
137 # find resolver contexts of stylesheet and transformed doc
138 if c_type == xslt.XSLT_LOAD_DOCUMENT:
139 # transformation time
140 c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
141 elif c_type == xslt.XSLT_LOAD_STYLESHEET:
142 # include/import resolution while parsing
143 c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
147 if c_pcontext is NULL:
148 # can't call Python without context, fall back to default loader
149 return XSLT_DOC_DEFAULT_LOADER(
150 c_uri, c_dict, parse_options, c_ctxt, c_type)
152 c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
153 if c_doc is NULL and not error:
154 c_doc = XSLT_DOC_DEFAULT_LOADER(
155 c_uri, c_dict, parse_options, c_ctxt, c_type)
157 _xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
159 if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
160 c_doc._private = c_pcontext
163 cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
164 xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
166 ################################################################################
167 # XSLT file/network access control
169 cdef class XSLTAccessControl:
170 u"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
172 Access control for XSLT: reading/writing files, directories and
173 network I/O. Access to a type of resource is granted or denied by
174 passing any of the following boolean keyword arguments. All of
175 them default to True to allow access.
183 For convenience, there is also a class member `DENY_ALL` that
184 provides an XSLTAccessControl instance that is readily configured
185 to deny everything, and a `DENY_WRITE` member that denies all
186 write access but allows read access.
190 cdef xslt.xsltSecurityPrefs* _prefs
192 self._prefs = xslt.xsltNewSecurityPrefs()
193 if self._prefs is NULL:
196 def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
197 bint read_network=True, bint write_network=True):
198 self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
199 self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
200 self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
201 self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
202 self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
204 DENY_ALL = XSLTAccessControl(
205 read_file=False, write_file=False, create_dir=False,
206 read_network=False, write_network=False)
208 DENY_WRITE = XSLTAccessControl(
209 read_file=True, write_file=False, create_dir=False,
210 read_network=True, write_network=False)
212 def __dealloc__(self):
213 if self._prefs is not NULL:
214 xslt.xsltFreeSecurityPrefs(self._prefs)
217 cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
218 cdef xslt.xsltSecurityCheck function
220 function = xslt.xsltSecurityAllow
222 function = xslt.xsltSecurityForbid
223 xslt.xsltSetSecurityPrefs(self._prefs, option, function)
226 cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
227 xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
231 """The access control configuration as a map of options."""
233 u'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
234 u'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
235 u'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
236 u'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
237 u'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
241 cdef _optval(self, xslt.xsltSecurityOption option):
242 cdef xslt.xsltSecurityCheck function
243 function = xslt.xsltGetSecurityPrefs(self._prefs, option)
244 if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
246 elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
252 items = sorted(self.options.items())
254 python._fqtypename(self).decode('UTF-8').split(u'.')[-1],
255 u', '.join([u"%s=%r" % item for item in items]))
257 ################################################################################
260 cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
263 # libxml2 internalises the strings if ctxt has a dict
264 return xslt.xsltRegisterExtFunction(
265 <xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
266 <xslt.xmlXPathFunction>_xpath_function_call)
268 cdef dict EMPTY_DICT = {}
272 cdef class _XSLTContext(_BaseContext):
273 cdef xslt.xsltTransformContext* _xsltCtxt
274 cdef _ReadOnlyElementProxy _extension_element_proxy
275 cdef dict _extension_elements
277 self._xsltCtxt = NULL
278 self._extension_elements = EMPTY_DICT
280 def __init__(self, namespaces, extensions, error_log, enable_regexp,
281 build_smart_strings):
282 if extensions is not None and extensions:
283 for ns_name_tuple, extension in extensions.items():
284 if ns_name_tuple[0] is None:
285 raise XSLTExtensionError, \
286 u"extensions must not have empty namespaces"
287 if isinstance(extension, XSLTExtension):
288 if self._extension_elements is EMPTY_DICT:
289 self._extension_elements = {}
290 extensions = extensions.copy()
291 ns_utf = _utf8(ns_name_tuple[0])
292 name_utf = _utf8(ns_name_tuple[1])
293 self._extension_elements[(ns_utf, name_utf)] = extension
294 del extensions[ns_name_tuple]
295 _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
298 cdef _BaseContext _copy(self):
299 cdef _XSLTContext context
300 context = <_XSLTContext>_BaseContext._copy(self)
301 context._extension_elements = self._extension_elements
304 cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
306 self._xsltCtxt = xsltCtxt
307 self._set_xpath_context(xsltCtxt.xpathCtxt)
308 self._register_context(doc)
309 self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
310 self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
311 _registerXSLTExtensions(xsltCtxt, self._extension_elements)
313 cdef free_context(self):
314 self._cleanup_context()
315 self._release_context()
316 if self._xsltCtxt is not NULL:
317 xslt.xsltFreeTransformContext(self._xsltCtxt)
318 self._xsltCtxt = NULL
319 self._release_temp_refs()
325 cdef class _XSLTQuotedStringParam:
326 u"""A wrapper class for literal XSLT string parameters that require
330 def __cinit__(self, strval):
331 self.strval = _utf8(strval)
336 u"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
338 Turn an XSL document into an XSLT object.
340 Calling this object on a tree or Element will execute the XSLT::
342 transform = etree.XSLT(xsl_tree)
343 result = transform(xml_tree)
345 Keyword arguments of the constructor:
347 - extensions: a dict mapping ``(namespace, name)`` pairs to
348 extension functions or extension elements
349 - regexp: enable exslt regular expression support in XPath
351 - access_control: access restrictions for network or file
352 system (see `XSLTAccessControl`)
354 Keyword arguments of the XSLT call:
356 - profile_run: enable XSLT profiling (default: False)
358 Other keyword arguments of the call are passed to the stylesheet
361 cdef _XSLTContext _context
362 cdef xslt.xsltStylesheet* _c_style
363 cdef _XSLTResolverContext _xslt_resolver_context
364 cdef XSLTAccessControl _access_control
365 cdef _ErrorLog _error_log
370 def __init__(self, xslt_input, *, extensions=None, regexp=True,
371 access_control=None):
372 cdef xslt.xsltStylesheet* c_style = NULL
375 cdef _Element root_node
377 doc = _documentOrRaise(xslt_input)
378 root_node = _rootNodeOrRaise(xslt_input)
380 # set access control or raise TypeError
381 self._access_control = access_control
383 # make a copy of the document as stylesheet parsing modifies it
384 c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
386 # make sure we always have a stylesheet URL
387 if c_doc.URL is NULL:
388 doc_url_utf = python.PyUnicode_AsASCIIString(
389 f"string://__STRING__XSLT__/{id(self)}.xslt")
390 c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
392 self._error_log = _ErrorLog()
393 self._xslt_resolver_context = _XSLTResolverContext()
394 _initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
395 # keep a copy in case we need to access the stylesheet via 'document()'
396 self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
397 c_doc._private = <python.PyObject*>self._xslt_resolver_context
399 with self._error_log:
400 c_style = xslt.xsltParseStylesheetDoc(c_doc)
402 if c_style is NULL or c_style.errors:
403 tree.xmlFreeDoc(c_doc)
404 if c_style is not NULL:
405 xslt.xsltFreeStylesheet(c_style)
406 self._xslt_resolver_context._raise_if_stored()
407 # last error seems to be the most accurate here
408 if self._error_log.last_error is not None and \
409 self._error_log.last_error.message:
410 raise XSLTParseError(self._error_log.last_error.message,
413 raise XSLTParseError(
414 self._error_log._buildExceptionMessage(
415 u"Cannot parse stylesheet"),
418 c_doc._private = NULL # no longer used!
419 self._c_style = c_style
420 self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
422 def __dealloc__(self):
423 if self._xslt_resolver_context is not None and \
424 self._xslt_resolver_context._c_style_doc is not NULL:
425 tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
426 # this cleans up the doc copy as well
427 if self._c_style is not NULL:
428 xslt.xsltFreeStylesheet(self._c_style)
432 """The log of errors and warnings of an XSLT execution."""
433 return self._error_log.copy()
436 def strparam(strval):
439 Mark an XSLT string parameter that requires quote escaping
440 before passing it into the transformation. Use it like this::
442 result = transform(doc, some_strval = XSLT.strparam(
443 '''it's \"Monty Python's\" ...'''))
445 Escaped string parameters can be reused without restriction.
447 return _XSLTQuotedStringParam(strval)
450 def set_global_max_depth(int max_depth):
451 u"""set_global_max_depth(max_depth)
453 The maximum traversal depth that the stylesheet engine will allow.
454 This does not only count the template recursion depth but also takes
455 the number of variables/parameters into account. The required setting
456 for a run depends on both the stylesheet and the input data.
460 XSLT.set_global_max_depth(5000)
462 Note that this is currently a global, module-wide setting because
463 libxslt does not support it at a per-stylesheet level.
466 raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
467 xslt.xsltMaxDepth = max_depth
469 def apply(self, _input, *, profile_run=False, **kw):
470 u"""apply(self, _input, profile_run=False, **kw)
472 :deprecated: call the object, not this method."""
473 return self(_input, profile_run=profile_run, **kw)
475 def tostring(self, _ElementTree result_tree):
476 u"""tostring(self, result_tree)
478 Save result doc to string based on stylesheet output method.
480 :deprecated: use str(result_tree) instead.
482 return str(result_tree)
484 def __deepcopy__(self, memo):
485 return self.__copy__()
488 return _copyXSLT(self)
490 def __call__(self, _input, *, profile_run=False, **kw):
491 u"""__call__(self, _input, profile_run=False, **kw)
493 Execute the XSL transformation on a tree or Element.
495 Pass the ``profile_run`` option to get profile information
496 about the XSLT. The result of the XSLT will have a property
497 xslt_profile that holds an XML tree with profiling data.
499 cdef _XSLTContext context = None
500 cdef _XSLTResolverContext resolver_context
501 cdef _Document input_doc
502 cdef _Element root_node
503 cdef _Document result_doc
504 cdef _Document profile_doc = None
505 cdef xmlDoc* c_profile_doc
506 cdef xslt.xsltTransformContext* transform_ctxt
507 cdef xmlDoc* c_result = NULL
509 cdef tree.xmlDict* c_dict
510 cdef const_char** params = NULL
512 assert self._c_style is not NULL, "XSLT stylesheet not initialised"
513 input_doc = _documentOrRaise(_input)
514 root_node = _rootNodeOrRaise(_input)
516 c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
518 transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
519 if transform_ctxt is NULL:
520 _destroyFakeDoc(input_doc._c_doc, c_doc)
523 # using the stylesheet dict is safer than using a possibly
524 # unrelated dict from the current thread. Almost all
525 # non-input tag/attr names will come from the stylesheet
527 if transform_ctxt.dict is not NULL:
528 xmlparser.xmlDictFree(transform_ctxt.dict)
530 # parameter values are stored in the dict
531 # => avoid unnecessarily cluttering the global dict
532 transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
533 if transform_ctxt.dict is NULL:
534 xslt.xsltFreeTransformContext(transform_ctxt)
537 transform_ctxt.dict = self._c_style.doc.dict
538 xmlparser.xmlDictReference(transform_ctxt.dict)
540 xslt.xsltSetCtxtParseOptions(
541 transform_ctxt, input_doc._parser._parse_options)
544 transform_ctxt.profile = 1
547 context = self._context._copy()
548 context.register_context(transform_ctxt, input_doc)
550 resolver_context = self._xslt_resolver_context._copy()
551 transform_ctxt._private = <python.PyObject*>resolver_context
553 _convert_xslt_parameters(transform_ctxt, kw, ¶ms)
554 c_result = self._run_transform(
555 c_doc, params, context, transform_ctxt)
556 if params is not NULL:
557 # deallocate space for parameters
558 python.lxml_free(params)
560 if transform_ctxt.state != xslt.XSLT_STATE_OK:
561 if c_result is not NULL:
562 tree.xmlFreeDoc(c_result)
565 if transform_ctxt.profile:
566 c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
567 if c_profile_doc is not NULL:
568 profile_doc = _documentFactory(
569 c_profile_doc, input_doc._parser)
571 if context is not None:
572 context.free_context()
573 _destroyFakeDoc(input_doc._c_doc, c_doc)
576 if resolver_context is not None and resolver_context._has_raised():
577 if c_result is not NULL:
578 tree.xmlFreeDoc(c_result)
580 resolver_context._raise_if_stored()
582 if context._exc._has_raised():
583 if c_result is not NULL:
584 tree.xmlFreeDoc(c_result)
586 context._exc._raise_if_stored()
589 # last error seems to be the most accurate here
590 error = self._error_log.last_error
591 if error is not None and error.message:
593 message = f"{error.message}, line {error.line}"
595 message = error.message
596 elif error is not None and error.line > 0:
597 message = f"Error applying stylesheet, line {error.line}"
599 message = u"Error applying stylesheet"
600 raise XSLTApplyError(message, self._error_log)
602 if resolver_context is not None:
603 resolver_context.clear()
605 result_doc = _documentFactory(c_result, input_doc._parser)
607 c_dict = c_result.dict
608 xmlparser.xmlDictReference(c_dict)
609 __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
610 if c_dict is not c_result.dict or \
611 self._c_style.doc.dict is not c_result.dict or \
612 input_doc._c_doc.dict is not c_result.dict:
614 if c_dict is not c_result.dict:
615 fixThreadDictNames(<xmlNode*>c_result,
616 c_dict, c_result.dict)
617 if self._c_style.doc.dict is not c_result.dict:
618 fixThreadDictNames(<xmlNode*>c_result,
619 self._c_style.doc.dict, c_result.dict)
620 if input_doc._c_doc.dict is not c_result.dict:
621 fixThreadDictNames(<xmlNode*>c_result,
622 input_doc._c_doc.dict, c_result.dict)
623 xmlparser.xmlDictFree(c_dict)
625 return _xsltResultTreeFactory(result_doc, self, profile_doc)
627 cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
628 const_char** params, _XSLTContext context,
629 xslt.xsltTransformContext* transform_ctxt):
630 cdef xmlDoc* c_result
631 xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
632 <xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
633 if self._access_control is not None:
634 self._access_control._register_in_context(transform_ctxt)
635 with self._error_log, nogil:
636 c_result = xslt.xsltApplyStylesheetUser(
637 self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
641 cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
642 dict parameters, const_char*** params_ptr):
643 cdef Py_ssize_t i, parameter_count
644 cdef const_char** params
645 cdef tree.xmlDict* c_dict = transform_ctxt.dict
647 parameter_count = len(parameters)
648 if parameter_count == 0:
650 # allocate space for parameters
651 # * 2 as we want an entry for both key and value,
652 # and + 1 as array is NULL terminated
653 params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
658 for key, value in parameters.iteritems():
660 if isinstance(value, _XSLTQuotedStringParam):
661 v = (<_XSLTQuotedStringParam>value).strval
662 xslt.xsltQuoteOneUserParam(
663 transform_ctxt, _xcstr(k), _xcstr(v))
665 if isinstance(value, XPath):
666 v = (<XPath>value)._path
669 params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(k), len(k))
671 params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(v), len(v))
674 python.lxml_free(params)
677 params_ptr[0] = params
679 cdef XSLT _copyXSLT(XSLT stylesheet):
682 assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
683 new_xslt = XSLT.__new__(XSLT)
684 new_xslt._access_control = stylesheet._access_control
685 new_xslt._error_log = _ErrorLog()
686 new_xslt._context = stylesheet._context._copy()
688 new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
689 new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
690 stylesheet._xslt_resolver_context._c_style_doc, 1)
692 c_doc = _copyDoc(stylesheet._c_style.doc, 1)
693 new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
694 if new_xslt._c_style is NULL:
695 tree.xmlFreeDoc(c_doc)
701 cdef class _XSLTResultTree(_ElementTree):
702 """The result of an XSLT evaluation.
704 Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
705 and the ``.write_output()`` method to write serialise to a file.
708 cdef _Document _profile
709 cdef xmlChar* _buffer
710 cdef Py_ssize_t _buffer_len
711 cdef Py_ssize_t _buffer_refcnt
713 def write_output(self, file, *, compression=0):
714 """write_output(self, file, *, compression=0)
716 Serialise the XSLT output to a file or file-like object.
718 As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
719 the result as defined by the ``<xsl:output>`` tag.
721 cdef _FilelikeWriter writer = None
723 cdef int r, rclose, c_compression
724 cdef const_xmlChar* c_encoding = NULL
725 cdef tree.xmlOutputBuffer* c_buffer
727 if self._context_node is not None:
728 doc = self._context_node._doc
734 raise XSLTSaveError("No document to serialise")
735 c_compression = compression or 0
736 xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
737 writer = _create_output_buffer(file, <const_char*>c_encoding, compression, &c_buffer, close=False)
740 r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
741 rclose = tree.xmlOutputBufferClose(c_buffer)
743 r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
744 rclose = tree.xmlOutputBufferClose(c_buffer)
745 if writer is not None:
746 writer._exc_context._raise_if_stored()
747 if r < 0 or rclose == -1:
748 python.PyErr_SetFromErrno(IOError) # raises IOError
750 cdef _saveToStringAndSize(self, xmlChar** s, int* l):
753 if self._context_node is not None:
754 doc = self._context_node._doc
763 r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
769 cdef xmlChar* s = NULL
771 if not python.IS_PYTHON2:
772 return self.__unicode__()
773 self._saveToStringAndSize(&s, &l)
776 # we must not use 'funicode()' here as this is not always UTF-8
778 result = <bytes>s[:l]
783 def __unicode__(self):
784 cdef xmlChar* encoding
785 cdef xmlChar* s = NULL
787 self._saveToStringAndSize(&s, &l)
790 encoding = self._xslt._c_style.encoding
793 result = s[:l].decode('UTF-8')
795 result = s[:l].decode(encoding)
798 return _stripEncodingDeclaration(result)
800 def __getbuffer__(self, Py_buffer* buffer, int flags):
804 if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
805 self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
807 if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
808 self._buffer = <xmlChar*>buffer.buf
810 self._buffer_refcnt = 1
812 buffer.buf = self._buffer
813 buffer.len = self._buffer_len
814 self._buffer_refcnt += 1
815 if flags & python.PyBUF_WRITABLE:
819 if flags & python.PyBUF_FORMAT:
825 buffer.strides = NULL
826 buffer.suboffsets = NULL
828 buffer.internal = NULL
829 if buffer.obj is not self: # set by Cython?
832 def __releasebuffer__(self, Py_buffer* buffer):
835 if <xmlChar*>buffer.buf is self._buffer:
836 self._buffer_refcnt -= 1
837 if self._buffer_refcnt == 0:
838 tree.xmlFree(<char*>self._buffer)
841 tree.xmlFree(<char*>buffer.buf)
844 property xslt_profile:
845 """Return an ElementTree with profiling data for the stylesheet run.
849 if self._profile is None:
851 root = self._profile.getroot()
854 return ElementTree(root)
859 cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
860 cdef _XSLTResultTree result
861 result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
863 result._profile = profile
866 # functions like "output" and "write" are a potential security risk, but we
867 # rely on the user to configure XSLTAccessControl as needed
868 xslt.xsltRegisterAllExtras()
870 # enable EXSLT support for XSLT
871 xslt.exsltRegisterAll()
874 ################################################################################
877 cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
878 cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
879 cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
880 cdef XPath __findStylesheetByID = None
882 cdef _findStylesheetByID(_Document doc, id):
883 global __findStylesheetByID
884 if __findStylesheetByID is None:
885 __findStylesheetByID = XPath(
886 u"//xsl:stylesheet[@xml:id = $id]",
887 namespaces={u"xsl" : u"http://www.w3.org/1999/XSL/Transform"})
888 return __findStylesheetByID(doc, id=id)
890 cdef class _XSLTProcessingInstruction(PIBase):
891 def parseXSL(self, parser=None):
892 u"""parseXSL(self, parser=None)
894 Try to parse the stylesheet referenced by this PI and return
895 an ElementTree for it. If the stylesheet is embedded in the
896 same document (referenced via xml:id), find and return an
897 ElementTree for the stylesheet Element.
899 The optional ``parser`` keyword argument can be passed to specify the
900 parser used to read from external stylesheet URLs.
902 cdef _Document result_doc
903 cdef _Element result_node
905 cdef const_xmlChar* c_href
907 _assertValidNode(self)
908 if self._c_node.content is NULL:
909 raise ValueError, u"PI lacks content"
910 hrefs = _FIND_PI_HREF(u' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
912 raise ValueError, u"malformed PI attributes"
914 href_utf = utf8(hrefs[0] or hrefs[1])
915 c_href = _xcstr(href_utf)
917 if c_href[0] != c'#':
918 # normal URL, try to parse from it
919 c_href = tree.xmlBuildURI(
921 tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
922 if c_href is not NULL:
924 href_utf = <unsigned char*>c_href
926 tree.xmlFree(<char*>c_href)
927 result_doc = _parseDocumentFromURL(href_utf, parser)
928 return _elementTreeFactory(result_doc, None)
930 # ID reference to embedded stylesheet
932 _assertValidDoc(self._doc)
933 c_href += 1 # skip leading '#'
934 c_attr = tree.xmlGetID(self._c_node.doc, c_href)
935 if c_attr is not NULL and c_attr.doc is self._c_node.doc:
936 result_node = _elementFactory(self._doc, c_attr.parent)
937 return _elementTreeFactory(result_node._doc, result_node)
940 root = _findStylesheetByID(self._doc, funicode(c_href))
942 raise ValueError, u"reference to non-existing embedded stylesheet"
944 raise ValueError, u"ambiguous reference to embedded stylesheet"
945 result_node = root[0]
946 return _elementTreeFactory(result_node._doc, result_node)
948 def set(self, key, value):
949 u"""set(self, key, value)
951 Supports setting the 'href' pseudo-attribute in the text of
952 the processing instruction.
955 raise AttributeError, \
956 u"only setting the 'href' attribute is supported on XSLT-PIs"
959 elif u'"' in value or u'>' in value:
960 raise ValueError, u"Invalid URL, must not contain '\"' or '>'"
962 attrib = f' href="{value}"'
963 text = u' ' + self.text
964 if _FIND_PI_HREF(text):
965 self.text = _REPLACE_PI_HREF(attrib, text)
967 self.text = text + attrib