3 from lxml.includes cimport xslt
6 cdef class XSLTError(LxmlError):
7 """Base class of all XSLT errors.
10 cdef class XSLTParseError(XSLTError):
11 """Error parsing a stylesheet document.
14 cdef class XSLTApplyError(XSLTError):
15 """Error running an XSL transformation.
18 class XSLTSaveError(XSLTError, SerialisationError):
19 """Error serialising an XSLT result.
22 cdef class XSLTExtensionError(XSLTError):
23 """Error registering an XSLT extension.
28 LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
29 LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
32 ################################################################################
33 # Where do we store what?
35 # xsltStylesheet->doc->_private
36 # == _XSLTResolverContext for XSL stylesheet
38 # xsltTransformContext->_private
39 # == _XSLTResolverContext for transformed document
41 ################################################################################
44 ################################################################################
45 # XSLT document loaders
49 cdef class _XSLTResolverContext(_ResolverContext):
50 cdef xmlDoc* _c_style_doc
51 cdef _BaseParser _parser
53 cdef _XSLTResolverContext _copy(self):
54 cdef _XSLTResolverContext context
55 context = _XSLTResolverContext()
56 _initXSLTResolverContext(context, self._parser)
57 context._c_style_doc = self._c_style_doc
60 cdef _initXSLTResolverContext(_XSLTResolverContext context,
62 _initResolverContext(context, parser.resolvers)
63 context._parser = parser
64 context._c_style_doc = NULL
66 cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
67 int parse_options, int* error) with gil:
68 # call the Python document loaders
69 cdef _XSLTResolverContext context
70 cdef _ResolverRegistry resolvers
71 cdef _InputDocument doc_ref
73 cdef xmlDoc* c_return_doc = NULL
76 context = <_XSLTResolverContext>c_context
78 # shortcut if we resolve the stylesheet itself
79 c_doc = context._c_style_doc
81 if c_doc is not NULL and c_doc.URL is not NULL:
82 if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
83 c_return_doc = _copyDoc(c_doc, 1)
84 return c_return_doc # 'goto', see 'finally' below
86 # delegate to the Python resolvers
87 resolvers = context._resolvers
88 if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
90 uri = _decodeFilename(c_uri)
91 doc_ref = resolvers.resolve(uri, None, context)
93 if doc_ref is not None:
94 if doc_ref._type == PARSER_DATA_STRING:
95 c_return_doc = _parseDoc(
96 doc_ref._data_bytes, doc_ref._filename, context._parser)
97 elif doc_ref._type == PARSER_DATA_FILENAME:
98 c_return_doc = _parseDocFromFile(
99 doc_ref._filename, context._parser)
100 elif doc_ref._type == PARSER_DATA_FILE:
101 c_return_doc = _parseDocFromFilelike(
102 doc_ref._file, doc_ref._filename, context._parser)
103 elif doc_ref._type == PARSER_DATA_EMPTY:
104 c_return_doc = _newXMLDoc()
105 if c_return_doc is not NULL and c_return_doc.URL is NULL:
106 c_return_doc.URL = tree.xmlStrdup(c_uri)
109 context._store_raised()
111 return c_return_doc # and swallow any further exceptions
114 cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
115 xslt.xsltLoadType c_type) with gil:
117 message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
118 if c_type == xslt.XSLT_LOAD_DOCUMENT:
119 exception = XSLTApplyError(message)
121 exception = XSLTParseError(message)
122 (<_XSLTResolverContext>context)._store_exception(exception)
123 except BaseException as e:
124 (<_XSLTResolverContext>context)._store_exception(e)
126 return # and swallow any further exceptions
129 cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
130 int parse_options, void* c_ctxt,
131 xslt.xsltLoadType c_type) nogil:
132 # nogil => no Python objects here, may be called without thread context !
135 cdef void* c_pcontext
137 # find resolver contexts of stylesheet and transformed doc
138 if c_type == xslt.XSLT_LOAD_DOCUMENT:
139 # transformation time
140 c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
141 elif c_type == xslt.XSLT_LOAD_STYLESHEET:
142 # include/import resolution while parsing
143 c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
147 if c_pcontext is NULL:
148 # can't call Python without context, fall back to default loader
149 return XSLT_DOC_DEFAULT_LOADER(
150 c_uri, c_dict, parse_options, c_ctxt, c_type)
152 c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
153 if c_doc is NULL and not error:
154 c_doc = XSLT_DOC_DEFAULT_LOADER(
155 c_uri, c_dict, parse_options, c_ctxt, c_type)
157 _xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
159 if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
160 c_doc._private = c_pcontext
163 cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
164 xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
166 ################################################################################
167 # XSLT file/network access control
169 cdef class XSLTAccessControl:
170 u"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
172 Access control for XSLT: reading/writing files, directories and
173 network I/O. Access to a type of resource is granted or denied by
174 passing any of the following boolean keyword arguments. All of
175 them default to True to allow access.
183 For convenience, there is also a class member `DENY_ALL` that
184 provides an XSLTAccessControl instance that is readily configured
185 to deny everything, and a `DENY_WRITE` member that denies all
186 write access but allows read access.
190 cdef xslt.xsltSecurityPrefs* _prefs
192 self._prefs = xslt.xsltNewSecurityPrefs()
193 if self._prefs is NULL:
196 def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
197 bint read_network=True, bint write_network=True):
198 self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
199 self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
200 self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
201 self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
202 self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
204 DENY_ALL = XSLTAccessControl(
205 read_file=False, write_file=False, create_dir=False,
206 read_network=False, write_network=False)
208 DENY_WRITE = XSLTAccessControl(
209 read_file=True, write_file=False, create_dir=False,
210 read_network=True, write_network=False)
212 def __dealloc__(self):
213 if self._prefs is not NULL:
214 xslt.xsltFreeSecurityPrefs(self._prefs)
217 cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
218 cdef xslt.xsltSecurityCheck function
220 function = xslt.xsltSecurityAllow
222 function = xslt.xsltSecurityForbid
223 xslt.xsltSetSecurityPrefs(self._prefs, option, function)
226 cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
227 xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
231 """The access control configuration as a map of options."""
233 u'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
234 u'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
235 u'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
236 u'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
237 u'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
241 cdef _optval(self, xslt.xsltSecurityOption option):
242 cdef xslt.xsltSecurityCheck function
243 function = xslt.xsltGetSecurityPrefs(self._prefs, option)
244 if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
246 elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
252 items = sorted(self.options.items())
254 python._fqtypename(self).decode('UTF-8').split(u'.')[-1],
255 u', '.join([u"%s=%r" % item for item in items]))
257 ################################################################################
260 cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
263 # libxml2 internalises the strings if ctxt has a dict
264 return xslt.xsltRegisterExtFunction(
265 <xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
266 <xslt.xmlXPathFunction>_xpath_function_call)
268 cdef dict EMPTY_DICT = {}
272 cdef class _XSLTContext(_BaseContext):
273 cdef xslt.xsltTransformContext* _xsltCtxt
274 cdef _ReadOnlyElementProxy _extension_element_proxy
275 cdef dict _extension_elements
277 self._xsltCtxt = NULL
278 self._extension_elements = EMPTY_DICT
280 def __init__(self, namespaces, extensions, error_log, enable_regexp,
281 build_smart_strings):
282 if extensions is not None and extensions:
283 for ns_name_tuple, extension in extensions.items():
284 if ns_name_tuple[0] is None:
285 raise XSLTExtensionError, \
286 u"extensions must not have empty namespaces"
287 if isinstance(extension, XSLTExtension):
288 if self._extension_elements is EMPTY_DICT:
289 self._extension_elements = {}
290 extensions = extensions.copy()
291 ns_utf = _utf8(ns_name_tuple[0])
292 name_utf = _utf8(ns_name_tuple[1])
293 self._extension_elements[(ns_utf, name_utf)] = extension
294 del extensions[ns_name_tuple]
295 _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
298 cdef _BaseContext _copy(self):
299 cdef _XSLTContext context
300 context = <_XSLTContext>_BaseContext._copy(self)
301 context._extension_elements = self._extension_elements
304 cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
306 self._xsltCtxt = xsltCtxt
307 self._set_xpath_context(xsltCtxt.xpathCtxt)
308 self._register_context(doc)
309 self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
310 self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
311 _registerXSLTExtensions(xsltCtxt, self._extension_elements)
313 cdef free_context(self):
314 self._cleanup_context()
315 self._release_context()
316 if self._xsltCtxt is not NULL:
317 xslt.xsltFreeTransformContext(self._xsltCtxt)
318 self._xsltCtxt = NULL
319 self._release_temp_refs()
325 cdef class _XSLTQuotedStringParam:
326 u"""A wrapper class for literal XSLT string parameters that require
330 def __cinit__(self, strval):
331 self.strval = _utf8(strval)
336 u"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
338 Turn an XSL document into an XSLT object.
340 Calling this object on a tree or Element will execute the XSLT::
342 transform = etree.XSLT(xsl_tree)
343 result = transform(xml_tree)
345 Keyword arguments of the constructor:
347 - extensions: a dict mapping ``(namespace, name)`` pairs to
348 extension functions or extension elements
349 - regexp: enable exslt regular expression support in XPath
351 - access_control: access restrictions for network or file
352 system (see `XSLTAccessControl`)
354 Keyword arguments of the XSLT call:
356 - profile_run: enable XSLT profiling (default: False)
358 Other keyword arguments of the call are passed to the stylesheet
361 cdef _XSLTContext _context
362 cdef xslt.xsltStylesheet* _c_style
363 cdef _XSLTResolverContext _xslt_resolver_context
364 cdef XSLTAccessControl _access_control
365 cdef _ErrorLog _error_log
370 def __init__(self, xslt_input, *, extensions=None, regexp=True,
371 access_control=None):
372 cdef xslt.xsltStylesheet* c_style = NULL
375 cdef _Element root_node
377 doc = _documentOrRaise(xslt_input)
378 root_node = _rootNodeOrRaise(xslt_input)
380 # set access control or raise TypeError
381 self._access_control = access_control
383 # make a copy of the document as stylesheet parsing modifies it
384 c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
386 # make sure we always have a stylesheet URL
387 if c_doc.URL is NULL:
388 doc_url_utf = python.PyUnicode_AsASCIIString(
389 f"string://__STRING__XSLT__/{id(self)}.xslt")
390 c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
392 self._error_log = _ErrorLog()
393 self._xslt_resolver_context = _XSLTResolverContext()
394 _initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
395 # keep a copy in case we need to access the stylesheet via 'document()'
396 self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
397 c_doc._private = <python.PyObject*>self._xslt_resolver_context
399 with self._error_log:
400 orig_loader = _register_document_loader()
401 c_style = xslt.xsltParseStylesheetDoc(c_doc)
402 _reset_document_loader(orig_loader)
404 if c_style is NULL or c_style.errors:
405 tree.xmlFreeDoc(c_doc)
406 if c_style is not NULL:
407 xslt.xsltFreeStylesheet(c_style)
408 self._xslt_resolver_context._raise_if_stored()
409 # last error seems to be the most accurate here
410 if self._error_log.last_error is not None and \
411 self._error_log.last_error.message:
412 raise XSLTParseError(self._error_log.last_error.message,
415 raise XSLTParseError(
416 self._error_log._buildExceptionMessage(
417 u"Cannot parse stylesheet"),
420 c_doc._private = NULL # no longer used!
421 self._c_style = c_style
422 self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
424 def __dealloc__(self):
425 if self._xslt_resolver_context is not None and \
426 self._xslt_resolver_context._c_style_doc is not NULL:
427 tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
428 # this cleans up the doc copy as well
429 if self._c_style is not NULL:
430 xslt.xsltFreeStylesheet(self._c_style)
434 """The log of errors and warnings of an XSLT execution."""
435 return self._error_log.copy()
438 def strparam(strval):
441 Mark an XSLT string parameter that requires quote escaping
442 before passing it into the transformation. Use it like this::
444 result = transform(doc, some_strval = XSLT.strparam(
445 '''it's \"Monty Python's\" ...'''))
447 Escaped string parameters can be reused without restriction.
449 return _XSLTQuotedStringParam(strval)
452 def set_global_max_depth(int max_depth):
453 u"""set_global_max_depth(max_depth)
455 The maximum traversal depth that the stylesheet engine will allow.
456 This does not only count the template recursion depth but also takes
457 the number of variables/parameters into account. The required setting
458 for a run depends on both the stylesheet and the input data.
462 XSLT.set_global_max_depth(5000)
464 Note that this is currently a global, module-wide setting because
465 libxslt does not support it at a per-stylesheet level.
468 raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
469 xslt.xsltMaxDepth = max_depth
471 def apply(self, _input, *, profile_run=False, **kw):
472 u"""apply(self, _input, profile_run=False, **kw)
474 :deprecated: call the object, not this method."""
475 return self(_input, profile_run=profile_run, **kw)
477 def tostring(self, _ElementTree result_tree):
478 u"""tostring(self, result_tree)
480 Save result doc to string based on stylesheet output method.
482 :deprecated: use str(result_tree) instead.
484 return str(result_tree)
486 def __deepcopy__(self, memo):
487 return self.__copy__()
490 return _copyXSLT(self)
492 def __call__(self, _input, *, profile_run=False, **kw):
493 u"""__call__(self, _input, profile_run=False, **kw)
495 Execute the XSL transformation on a tree or Element.
497 Pass the ``profile_run`` option to get profile information
498 about the XSLT. The result of the XSLT will have a property
499 xslt_profile that holds an XML tree with profiling data.
501 cdef _XSLTContext context = None
502 cdef _XSLTResolverContext resolver_context
503 cdef _Document input_doc
504 cdef _Element root_node
505 cdef _Document result_doc
506 cdef _Document profile_doc = None
507 cdef xmlDoc* c_profile_doc
508 cdef xslt.xsltTransformContext* transform_ctxt
509 cdef xmlDoc* c_result = NULL
511 cdef tree.xmlDict* c_dict
512 cdef const_char** params = NULL
514 assert self._c_style is not NULL, "XSLT stylesheet not initialised"
515 input_doc = _documentOrRaise(_input)
516 root_node = _rootNodeOrRaise(_input)
518 c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
520 transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
521 if transform_ctxt is NULL:
522 _destroyFakeDoc(input_doc._c_doc, c_doc)
525 # using the stylesheet dict is safer than using a possibly
526 # unrelated dict from the current thread. Almost all
527 # non-input tag/attr names will come from the stylesheet
529 if transform_ctxt.dict is not NULL:
530 xmlparser.xmlDictFree(transform_ctxt.dict)
532 # parameter values are stored in the dict
533 # => avoid unnecessarily cluttering the global dict
534 transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
535 if transform_ctxt.dict is NULL:
536 xslt.xsltFreeTransformContext(transform_ctxt)
539 transform_ctxt.dict = self._c_style.doc.dict
540 xmlparser.xmlDictReference(transform_ctxt.dict)
542 xslt.xsltSetCtxtParseOptions(
543 transform_ctxt, input_doc._parser._parse_options)
546 transform_ctxt.profile = 1
549 context = self._context._copy()
550 context.register_context(transform_ctxt, input_doc)
552 resolver_context = self._xslt_resolver_context._copy()
553 transform_ctxt._private = <python.PyObject*>resolver_context
555 _convert_xslt_parameters(transform_ctxt, kw, ¶ms)
556 c_result = self._run_transform(
557 c_doc, params, context, transform_ctxt)
558 if params is not NULL:
559 # deallocate space for parameters
560 python.lxml_free(params)
562 if transform_ctxt.state != xslt.XSLT_STATE_OK:
563 if c_result is not NULL:
564 tree.xmlFreeDoc(c_result)
567 if transform_ctxt.profile:
568 c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
569 if c_profile_doc is not NULL:
570 profile_doc = _documentFactory(
571 c_profile_doc, input_doc._parser)
573 if context is not None:
574 context.free_context()
575 _destroyFakeDoc(input_doc._c_doc, c_doc)
578 if resolver_context is not None and resolver_context._has_raised():
579 if c_result is not NULL:
580 tree.xmlFreeDoc(c_result)
582 resolver_context._raise_if_stored()
584 if context._exc._has_raised():
585 if c_result is not NULL:
586 tree.xmlFreeDoc(c_result)
588 context._exc._raise_if_stored()
591 # last error seems to be the most accurate here
592 error = self._error_log.last_error
593 if error is not None and error.message:
595 message = f"{error.message}, line {error.line}"
597 message = error.message
598 elif error is not None and error.line > 0:
599 message = f"Error applying stylesheet, line {error.line}"
601 message = u"Error applying stylesheet"
602 raise XSLTApplyError(message, self._error_log)
604 if resolver_context is not None:
605 resolver_context.clear()
607 result_doc = _documentFactory(c_result, input_doc._parser)
609 c_dict = c_result.dict
610 xmlparser.xmlDictReference(c_dict)
611 __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
612 if c_dict is not c_result.dict or \
613 self._c_style.doc.dict is not c_result.dict or \
614 input_doc._c_doc.dict is not c_result.dict:
616 if c_dict is not c_result.dict:
617 fixThreadDictNames(<xmlNode*>c_result,
618 c_dict, c_result.dict)
619 if self._c_style.doc.dict is not c_result.dict:
620 fixThreadDictNames(<xmlNode*>c_result,
621 self._c_style.doc.dict, c_result.dict)
622 if input_doc._c_doc.dict is not c_result.dict:
623 fixThreadDictNames(<xmlNode*>c_result,
624 input_doc._c_doc.dict, c_result.dict)
625 xmlparser.xmlDictFree(c_dict)
627 return _xsltResultTreeFactory(result_doc, self, profile_doc)
629 cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
630 const_char** params, _XSLTContext context,
631 xslt.xsltTransformContext* transform_ctxt):
632 cdef xmlDoc* c_result
633 xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
634 <xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
635 if self._access_control is not None:
636 self._access_control._register_in_context(transform_ctxt)
637 with self._error_log, nogil:
638 orig_loader = _register_document_loader()
639 c_result = xslt.xsltApplyStylesheetUser(
640 self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
641 _reset_document_loader(orig_loader)
645 cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
646 dict parameters, const_char*** params_ptr):
647 cdef Py_ssize_t i, parameter_count
648 cdef const_char** params
649 cdef tree.xmlDict* c_dict = transform_ctxt.dict
651 parameter_count = len(parameters)
652 if parameter_count == 0:
654 # allocate space for parameters
655 # * 2 as we want an entry for both key and value,
656 # and + 1 as array is NULL terminated
657 params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
662 for key, value in parameters.iteritems():
664 if isinstance(value, _XSLTQuotedStringParam):
665 v = (<_XSLTQuotedStringParam>value).strval
666 xslt.xsltQuoteOneUserParam(
667 transform_ctxt, _xcstr(k), _xcstr(v))
669 if isinstance(value, XPath):
670 v = (<XPath>value)._path
673 params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(k), len(k))
675 params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(v), len(v))
678 python.lxml_free(params)
681 params_ptr[0] = params
683 cdef XSLT _copyXSLT(XSLT stylesheet):
686 assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
687 new_xslt = XSLT.__new__(XSLT)
688 new_xslt._access_control = stylesheet._access_control
689 new_xslt._error_log = _ErrorLog()
690 new_xslt._context = stylesheet._context._copy()
692 new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
693 new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
694 stylesheet._xslt_resolver_context._c_style_doc, 1)
696 c_doc = _copyDoc(stylesheet._c_style.doc, 1)
697 new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
698 if new_xslt._c_style is NULL:
699 tree.xmlFreeDoc(c_doc)
705 cdef class _XSLTResultTree(_ElementTree):
706 """The result of an XSLT evaluation.
708 Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
709 and the ``.write_output()`` method to write serialise to a file.
712 cdef _Document _profile
713 cdef xmlChar* _buffer
714 cdef Py_ssize_t _buffer_len
715 cdef Py_ssize_t _buffer_refcnt
717 def write_output(self, file, *, compression=0):
718 """write_output(self, file, *, compression=0)
720 Serialise the XSLT output to a file or file-like object.
722 As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
723 the result as defined by the ``<xsl:output>`` tag.
725 cdef _FilelikeWriter writer = None
727 cdef int r, rclose, c_compression
728 cdef const_xmlChar* c_encoding = NULL
729 cdef tree.xmlOutputBuffer* c_buffer
731 if self._context_node is not None:
732 doc = self._context_node._doc
738 raise XSLTSaveError("No document to serialise")
739 c_compression = compression or 0
740 xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
741 writer = _create_output_buffer(file, <const_char*>c_encoding, compression, &c_buffer, close=False)
744 r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
745 rclose = tree.xmlOutputBufferClose(c_buffer)
747 r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
748 rclose = tree.xmlOutputBufferClose(c_buffer)
749 if writer is not None:
750 writer._exc_context._raise_if_stored()
751 if r < 0 or rclose == -1:
752 python.PyErr_SetFromErrno(IOError) # raises IOError
754 cdef _saveToStringAndSize(self, xmlChar** s, int* l):
757 if self._context_node is not None:
758 doc = self._context_node._doc
767 r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
773 cdef xmlChar* s = NULL
775 if not python.IS_PYTHON2:
776 return self.__unicode__()
777 self._saveToStringAndSize(&s, &l)
780 # we must not use 'funicode()' here as this is not always UTF-8
782 result = <bytes>s[:l]
787 def __unicode__(self):
788 cdef xmlChar* encoding
789 cdef xmlChar* s = NULL
791 self._saveToStringAndSize(&s, &l)
794 encoding = self._xslt._c_style.encoding
797 result = s[:l].decode('UTF-8')
799 result = s[:l].decode(encoding)
802 return _stripEncodingDeclaration(result)
804 def __getbuffer__(self, Py_buffer* buffer, int flags):
808 if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
809 self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
811 if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
812 self._buffer = <xmlChar*>buffer.buf
814 self._buffer_refcnt = 1
816 buffer.buf = self._buffer
817 buffer.len = self._buffer_len
818 self._buffer_refcnt += 1
819 if flags & python.PyBUF_WRITABLE:
823 if flags & python.PyBUF_FORMAT:
829 buffer.strides = NULL
830 buffer.suboffsets = NULL
832 buffer.internal = NULL
833 if buffer.obj is not self: # set by Cython?
836 def __releasebuffer__(self, Py_buffer* buffer):
839 if <xmlChar*>buffer.buf is self._buffer:
840 self._buffer_refcnt -= 1
841 if self._buffer_refcnt == 0:
842 tree.xmlFree(<char*>self._buffer)
845 tree.xmlFree(<char*>buffer.buf)
848 property xslt_profile:
849 """Return an ElementTree with profiling data for the stylesheet run.
853 if self._profile is None:
855 root = self._profile.getroot()
858 return ElementTree(root)
863 cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
864 cdef _XSLTResultTree result
865 result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
867 result._profile = profile
870 # functions like "output" and "write" are a potential security risk, but we
871 # rely on the user to configure XSLTAccessControl as needed
872 xslt.xsltRegisterAllExtras()
874 # enable EXSLT support for XSLT
875 xslt.exsltRegisterAll()
878 ################################################################################
881 cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
882 cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
883 cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
884 cdef XPath __findStylesheetByID = None
886 cdef _findStylesheetByID(_Document doc, id):
887 global __findStylesheetByID
888 if __findStylesheetByID is None:
889 __findStylesheetByID = XPath(
890 u"//xsl:stylesheet[@xml:id = $id]",
891 namespaces={u"xsl" : u"http://www.w3.org/1999/XSL/Transform"})
892 return __findStylesheetByID(doc, id=id)
894 cdef class _XSLTProcessingInstruction(PIBase):
895 def parseXSL(self, parser=None):
896 u"""parseXSL(self, parser=None)
898 Try to parse the stylesheet referenced by this PI and return
899 an ElementTree for it. If the stylesheet is embedded in the
900 same document (referenced via xml:id), find and return an
901 ElementTree for the stylesheet Element.
903 The optional ``parser`` keyword argument can be passed to specify the
904 parser used to read from external stylesheet URLs.
906 cdef _Document result_doc
907 cdef _Element result_node
909 cdef const_xmlChar* c_href
911 _assertValidNode(self)
912 if self._c_node.content is NULL:
913 raise ValueError, u"PI lacks content"
914 hrefs = _FIND_PI_HREF(u' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
916 raise ValueError, u"malformed PI attributes"
918 href_utf = utf8(hrefs[0] or hrefs[1])
919 c_href = _xcstr(href_utf)
921 if c_href[0] != c'#':
922 # normal URL, try to parse from it
923 c_href = tree.xmlBuildURI(
925 tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
926 if c_href is not NULL:
928 href_utf = <unsigned char*>c_href
930 tree.xmlFree(<char*>c_href)
931 result_doc = _parseDocumentFromURL(href_utf, parser)
932 return _elementTreeFactory(result_doc, None)
934 # ID reference to embedded stylesheet
936 _assertValidDoc(self._doc)
937 c_href += 1 # skip leading '#'
938 c_attr = tree.xmlGetID(self._c_node.doc, c_href)
939 if c_attr is not NULL and c_attr.doc is self._c_node.doc:
940 result_node = _elementFactory(self._doc, c_attr.parent)
941 return _elementTreeFactory(result_node._doc, result_node)
944 root = _findStylesheetByID(self._doc, funicode(c_href))
946 raise ValueError, u"reference to non-existing embedded stylesheet"
948 raise ValueError, u"ambiguous reference to embedded stylesheet"
949 result_node = root[0]
950 return _elementTreeFactory(result_node._doc, result_node)
952 def set(self, key, value):
953 u"""set(self, key, value)
955 Supports setting the 'href' pseudo-attribute in the text of
956 the processing instruction.
959 raise AttributeError, \
960 u"only setting the 'href' attribute is supported on XSLT-PIs"
963 elif u'"' in value or u'>' in value:
964 raise ValueError, u"Invalid URL, must not contain '\"' or '>'"
966 attrib = f' href="{value}"'
967 text = u' ' + self.text
968 if _FIND_PI_HREF(text):
969 self.text = _REPLACE_PI_HREF(attrib, text)
971 self.text = text + attrib