5 class XSLTError(LxmlError):
6 u"""Base class of all XSLT errors.
10 class XSLTParseError(XSLTError):
11 u"""Error parsing a stylesheet document.
15 class XSLTApplyError(XSLTError):
16 u"""Error running an XSL transformation.
20 class XSLTSaveError(XSLTError):
21 u"""Error serialising an XSLT result.
25 class XSLTExtensionError(XSLTError):
26 u"""Error registering an XSLT extension.
31 LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
32 LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
35 ################################################################################
36 # Where do we store what?
38 # xsltStylesheet->doc->_private
39 # == _XSLTResolverContext for XSL stylesheet
41 # xsltTransformContext->_private
42 # == _XSLTResolverContext for transformed document
44 ################################################################################
47 ################################################################################
48 # XSLT document loaders
50 cdef class _XSLTResolverContext(_ResolverContext):
51 cdef xmlDoc* _c_style_doc
52 cdef _BaseParser _parser
54 cdef _XSLTResolverContext _copy(self):
55 cdef _XSLTResolverContext context
56 context = _XSLTResolverContext()
57 _initXSLTResolverContext(context, self._parser)
58 context._c_style_doc = self._c_style_doc
61 cdef _initXSLTResolverContext(_XSLTResolverContext context,
63 _initResolverContext(context, parser.resolvers)
64 context._parser = parser
65 context._c_style_doc = NULL
67 cdef xmlDoc* _xslt_resolve_from_python(char* c_uri, void* c_context,
68 int parse_options, int* error) with gil:
69 # call the Python document loaders
70 cdef _XSLTResolverContext context
71 cdef _ResolverRegistry resolvers
72 cdef _InputDocument doc_ref
76 context = <_XSLTResolverContext>c_context
78 # shortcut if we resolve the stylesheet itself
79 c_doc = context._c_style_doc
80 if c_doc is not NULL and c_doc.URL is not NULL:
81 if cstd.strcmp(c_uri, c_doc.URL) == 0:
82 return _copyDoc(c_doc, 1)
84 # delegate to the Python resolvers
86 resolvers = context._resolvers
87 if cstd.strncmp('string://__STRING__XSLT__/', c_uri, 26) == 0:
89 uri = _decodeFilename(c_uri)
90 doc_ref = resolvers.resolve(uri, None, context)
93 if doc_ref is not None:
94 if doc_ref._type == PARSER_DATA_STRING:
96 doc_ref._data_bytes, doc_ref._filename, context._parser)
97 elif doc_ref._type == PARSER_DATA_FILENAME:
98 c_doc = _parseDocFromFile(doc_ref._filename, context._parser)
99 elif doc_ref._type == PARSER_DATA_FILE:
100 c_doc = _parseDocFromFilelike(
101 doc_ref._file, doc_ref._filename, context._parser)
102 elif doc_ref._type == PARSER_DATA_EMPTY:
104 if c_doc is not NULL and c_doc.URL is NULL:
105 c_doc.URL = tree.xmlStrdup(c_uri)
108 context._store_raised()
112 cdef void _xslt_store_resolver_exception(char* c_uri, void* context,
113 xslt.xsltLoadType c_type) with gil:
115 message = u"Cannot resolve URI %s" % _decodeFilename(c_uri)
116 if c_type == xslt.XSLT_LOAD_DOCUMENT:
117 exception = XSLTApplyError(message)
119 exception = XSLTParseError(message)
120 (<_XSLTResolverContext>context)._store_exception(exception)
122 (<_XSLTResolverContext>context)._store_exception(e)
124 cdef xmlDoc* _xslt_doc_loader(char* c_uri, tree.xmlDict* c_dict,
125 int parse_options, void* c_ctxt,
126 xslt.xsltLoadType c_type) nogil:
127 # no Python objects here, may be called without thread context !
128 # when we declare a Python object, Pyrex will INCREF(None) !
131 cdef void* c_pcontext
133 # find resolver contexts of stylesheet and transformed doc
134 if c_type == xslt.XSLT_LOAD_DOCUMENT:
135 # transformation time
136 c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
137 elif c_type == xslt.XSLT_LOAD_STYLESHEET:
138 # include/import resolution while parsing
139 c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
143 if c_pcontext is NULL:
144 # can't call Python without context, fall back to default loader
145 return XSLT_DOC_DEFAULT_LOADER(
146 c_uri, c_dict, parse_options, c_ctxt, c_type)
148 c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
149 if c_doc is NULL and not error:
150 c_doc = XSLT_DOC_DEFAULT_LOADER(
151 c_uri, c_dict, parse_options, c_ctxt, c_type)
153 _xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
155 if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
156 c_doc._private = c_pcontext
159 cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER
160 XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
162 xslt.xsltSetLoaderFunc(_xslt_doc_loader)
164 ################################################################################
165 # XSLT file/network access control
167 cdef class XSLTAccessControl:
168 u"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
170 Access control for XSLT: reading/writing files, directories and
171 network I/O. Access to a type of resource is granted or denied by
172 passing any of the following boolean keyword arguments. All of
173 them default to True to allow access.
181 For convenience, there is also a class member `DENY_ALL` that
182 provides an XSLTAccessControl instance that is readily configured
183 to deny everything, and a `DENY_WRITE` member that denies all
184 write access but allows read access.
188 cdef xslt.xsltSecurityPrefs* _prefs
189 def __init__(self, *, read_file=True, write_file=True, create_dir=True,
190 read_network=True, write_network=True):
191 self._prefs = xslt.xsltNewSecurityPrefs()
192 if self._prefs is NULL:
193 python.PyErr_NoMemory()
194 self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
195 self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
196 self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
197 self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
198 self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
200 DENY_ALL = XSLTAccessControl(
201 read_file=False, write_file=False, create_dir=False,
202 read_network=False, write_network=False)
204 DENY_WRITE = XSLTAccessControl(
205 read_file=True, write_file=False, create_dir=False,
206 read_network=True, write_network=False)
208 def __dealloc__(self):
209 if self._prefs is not NULL:
210 xslt.xsltFreeSecurityPrefs(self._prefs)
212 cdef _setAccess(self, xslt.xsltSecurityOption option, allow):
213 cdef xslt.xsltSecurityCheck function
215 function = xslt.xsltSecurityAllow
217 function = xslt.xsltSecurityForbid
218 xslt.xsltSetSecurityPrefs(self._prefs, option, function)
220 cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
221 xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
224 u"The access control configuration as a map of options."
227 u'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
228 u'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
229 u'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
230 u'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
231 u'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
234 cdef _optval(self, xslt.xsltSecurityOption option):
235 cdef xslt.xsltSecurityCheck function
236 function = xslt.xsltGetSecurityPrefs(self._prefs, option)
237 if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
239 elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
245 items = self.options.items()
248 funicode(python._fqtypename(self)).split(u'.')[-1],
249 u', '.join([u"%s=%r" % item for item in items]))
251 ################################################################################
254 cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
257 return xslt.xsltRegisterExtFunction(
258 <xslt.xsltTransformContext*>ctxt, _cstr(name_utf), _cstr(ns_utf),
259 _xpath_function_call)
261 cdef int _unregister_xslt_function(void* ctxt, name_utf, ns_utf):
264 return xslt.xsltRegisterExtFunction(
265 <xslt.xsltTransformContext*>ctxt, _cstr(name_utf), _cstr(ns_utf),
268 cdef dict EMPTY_DICT = {}
270 cdef class _XSLTContext(_BaseContext):
271 cdef xslt.xsltTransformContext* _xsltCtxt
272 cdef _ReadOnlyElementProxy _extension_element_proxy
273 cdef dict _extension_elements
274 def __init__(self, namespaces, extensions, enable_regexp,
275 build_smart_strings):
276 self._xsltCtxt = NULL
277 self._extension_elements = EMPTY_DICT
278 if extensions is not None and extensions:
279 for ns_name_tuple, extension in extensions.items():
280 if ns_name_tuple[0] is None:
281 raise XSLTExtensionError, \
282 u"extensions must not have empty namespaces"
283 if isinstance(extension, XSLTExtension):
284 if self._extension_elements is EMPTY_DICT:
285 self._extension_elements = {}
286 extensions = extensions.copy()
287 ns_utf = _utf8(ns_name_tuple[0])
288 name_utf = _utf8(ns_name_tuple[1])
289 self._extension_elements[(ns_utf, name_utf)] = extension
290 del extensions[ns_name_tuple]
291 _BaseContext.__init__(self, namespaces, extensions, enable_regexp,
294 cdef _BaseContext _copy(self):
295 cdef _XSLTContext context
296 context = <_XSLTContext>_BaseContext._copy(self)
297 context._extension_elements = self._extension_elements
300 cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
302 self._xsltCtxt = xsltCtxt
303 self._set_xpath_context(xsltCtxt.xpathCtxt)
304 self._register_context(doc)
305 self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
306 self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
307 _registerXSLTExtensions(xsltCtxt, self._extension_elements)
309 cdef free_context(self):
310 self._cleanup_context()
311 self._release_context()
312 if self._xsltCtxt is not NULL:
313 xslt.xsltFreeTransformContext(self._xsltCtxt)
314 self._xsltCtxt = NULL
315 self._release_temp_refs()
318 cdef class _XSLTQuotedStringParam:
319 u"""A wrapper class for literal XSLT string parameters that require
323 def __init__(self, strval):
324 self.strval = _utf8(strval)
328 u"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
330 Turn an XSL document into an XSLT object.
332 Calling this object on a tree or Element will execute the XSLT::
334 >>> transform = etree.XSLT(xsl_tree)
335 >>> result = transform(xml_tree)
337 Keyword arguments of the constructor:
339 - extensions: a dict mapping ``(namespace, name)`` pairs to
340 extension functions or extension elements
341 - regexp: enable exslt regular expression support in XPath
343 - access_control: access restrictions for network or file
344 system (see `XSLTAccessControl`)
346 Keyword arguments of the XSLT call:
348 - profile_run: enable XSLT profiling (default: False)
350 Other keyword arguments of the call are passed to the stylesheet
353 cdef _XSLTContext _context
354 cdef xslt.xsltStylesheet* _c_style
355 cdef _XSLTResolverContext _xslt_resolver_context
356 cdef XSLTAccessControl _access_control
357 cdef _ErrorLog _error_log
359 def __init__(self, xslt_input, *, extensions=None, regexp=True,
360 access_control=None):
361 cdef xslt.xsltStylesheet* c_style
364 cdef _Element root_node
366 doc = _documentOrRaise(xslt_input)
367 root_node = _rootNodeOrRaise(xslt_input)
369 # set access control or raise TypeError
370 self._access_control = access_control
372 # make a copy of the document as stylesheet parsing modifies it
373 c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
375 # make sure we always have a stylesheet URL
376 if c_doc.URL is NULL:
377 doc_url_utf = python.PyUnicode_AsASCIIString(
378 u"string://__STRING__XSLT__/%d.xslt" % id(self))
379 c_doc.URL = tree.xmlStrdup(_cstr(doc_url_utf))
381 self._error_log = _ErrorLog()
382 self._xslt_resolver_context = _XSLTResolverContext()
383 _initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
384 # keep a copy in case we need to access the stylesheet via 'document()'
385 self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
386 c_doc._private = <python.PyObject*>self._xslt_resolver_context
388 self._error_log.connect()
390 c_style = xslt.xsltParseStylesheetDoc(c_doc)
391 self._error_log.disconnect()
394 tree.xmlFreeDoc(c_doc)
395 self._xslt_resolver_context._raise_if_stored()
396 # last error seems to be the most accurate here
397 if self._error_log.last_error is not None and \
398 self._error_log.last_error.message:
399 raise XSLTParseError(self._error_log.last_error.message,
402 raise XSLTParseError(
403 self._error_log._buildExceptionMessage(
404 u"Cannot parse stylesheet"),
407 c_doc._private = NULL # no longer used!
408 self._c_style = c_style
409 self._context = _XSLTContext(None, extensions, regexp, True)
411 def __dealloc__(self):
412 if self._xslt_resolver_context is not None and \
413 self._xslt_resolver_context._c_style_doc is not NULL:
414 tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
415 # this cleans up the doc copy as well
416 xslt.xsltFreeStylesheet(self._c_style)
419 u"The log of errors and warnings of an XSLT execution."
421 return self._error_log.copy()
424 def strparam(_, strval):
427 Mark an XSLT string parameter that requires quote escaping
428 before passing it into the transformation. Use it like this::
430 result = transform(doc, some_strval = XSLT.strparam(
431 '''it's \"Monty Python's\" ...'''))
433 Escaped string parameters can be reused without restriction.
435 return _XSLTQuotedStringParam(strval)
437 def apply(self, _input, *, profile_run=False, **kw):
438 u"""apply(self, _input, profile_run=False, **kw)
440 :deprecated: call the object, not this method."""
441 return self(_input, profile_run=profile_run, **kw)
443 def tostring(self, _ElementTree result_tree):
444 u"""tostring(self, result_tree)
446 Save result doc to string based on stylesheet output method.
448 :deprecated: use str(result_tree) instead.
450 return str(result_tree)
452 def __deepcopy__(self, memo):
453 return self.__copy__()
456 return _copyXSLT(self)
458 def __call__(self, _input, *, profile_run=False, **kw):
459 u"""__call__(self, _input, profile_run=False, **kw)
461 Execute the XSL transformation on a tree or Element.
463 Pass the ``profile_run`` option to get profile information
464 about the XSLT. The result of the XSLT will have a property
465 xslt_profile that holds an XML tree with profiling data.
467 cdef _XSLTContext context
468 cdef _XSLTResolverContext resolver_context
469 cdef _Document input_doc
470 cdef _Element root_node
471 cdef _Document result_doc
472 cdef _Document profile_doc
473 cdef xmlDoc* c_profile_doc
474 cdef xslt.xsltTransformContext* transform_ctxt
475 cdef xmlDoc* c_result = NULL
477 cdef tree.xmlDict* c_dict
479 input_doc = _documentOrRaise(_input)
480 root_node = _rootNodeOrRaise(_input)
482 c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
484 transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
485 if transform_ctxt is NULL:
486 _destroyFakeDoc(input_doc._c_doc, c_doc)
487 python.PyErr_NoMemory()
489 # using the stylesheet dict is safer than using a possibly
490 # unrelated dict from the current thread. Almost all
491 # non-input tag/attr names will come from the stylesheet
493 if transform_ctxt.dict is not NULL:
494 xmlparser.xmlDictFree(transform_ctxt.dict)
495 transform_ctxt.dict = self._c_style.doc.dict
496 xmlparser.xmlDictReference(transform_ctxt.dict)
498 xslt.xsltSetCtxtParseOptions(
499 transform_ctxt, input_doc._parser._parse_options)
502 transform_ctxt.profile = 1
505 self._error_log.connect()
506 context = self._context._copy()
507 context.register_context(transform_ctxt, input_doc)
509 resolver_context = self._xslt_resolver_context._copy()
510 transform_ctxt._private = <python.PyObject*>resolver_context
512 c_result = self._run_transform(
513 c_doc, kw, context, transform_ctxt)
515 if transform_ctxt.state != xslt.XSLT_STATE_OK:
516 if c_result is not NULL:
517 tree.xmlFreeDoc(c_result)
520 if transform_ctxt.profile:
521 c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
522 if c_profile_doc is not NULL:
523 profile_doc = _documentFactory(
524 c_profile_doc, input_doc._parser)
526 if context is not None:
527 context.free_context()
528 _destroyFakeDoc(input_doc._c_doc, c_doc)
529 self._error_log.disconnect()
532 if resolver_context is not None and resolver_context._has_raised():
533 if c_result is not NULL:
534 tree.xmlFreeDoc(c_result)
536 resolver_context._raise_if_stored()
538 if context._exc._has_raised():
539 if c_result is not NULL:
540 tree.xmlFreeDoc(c_result)
542 context._exc._raise_if_stored()
545 # last error seems to be the most accurate here
546 error = self._error_log.last_error
547 if error is not None and error.message:
549 message = u"%s, line %d" % (error.message, error.line)
551 message = error.message
552 elif error is not None and error.line > 0:
553 message = u"Error applying stylesheet, line %d" % error.line
555 message = u"Error applying stylesheet"
556 raise XSLTApplyError(message, self._error_log)
558 if resolver_context is not None:
559 resolver_context.clear()
561 result_doc = _documentFactory(c_result, input_doc._parser)
563 c_dict = c_result.dict
564 __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
565 if c_dict is not c_result.dict or \
566 self._c_style.doc.dict is not c_result.dict or \
567 input_doc._c_doc.dict is not c_result.dict:
569 if c_dict is not c_result.dict:
570 fixThreadDictNames(<xmlNode*>c_result,
571 c_dict, c_result.dict)
572 if self._c_style.doc.dict is not c_result.dict:
573 fixThreadDictNames(<xmlNode*>c_result,
574 self._c_style.doc.dict, c_result.dict)
575 if input_doc._c_doc.dict is not c_result.dict:
576 fixThreadDictNames(<xmlNode*>c_result,
577 input_doc._c_doc.dict, c_result.dict)
579 return _xsltResultTreeFactory(result_doc, self, profile_doc)
581 cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
582 dict parameters, _XSLTContext context,
583 xslt.xsltTransformContext* transform_ctxt):
584 cdef xmlDoc* c_result
586 cdef Py_ssize_t i, parameter_count
589 xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
592 if self._access_control is not None:
593 self._access_control._register_in_context(transform_ctxt)
595 parameter_count = len(parameters)
596 if parameter_count > 0:
597 # allocate space for parameters
598 # * 2 as we want an entry for both key and value,
599 # and + 1 as array is NULL terminated
600 params = <char**>python.PyMem_Malloc(
601 sizeof(char*) * (parameter_count * 2 + 1))
605 for key, value in parameters.iteritems():
607 if isinstance(value, _XSLTQuotedStringParam):
608 v = (<_XSLTQuotedStringParam>value).strval
609 xslt.xsltQuoteOneUserParam(
610 transform_ctxt, _cstr(k), _cstr(v))
620 python.PyMem_Free(params)
627 c_result = xslt.xsltApplyStylesheetUser(
628 self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
630 if params is not NULL:
631 # deallocate space for parameters
632 python.PyMem_Free(params)
636 cdef extern from "etree_defs.h":
637 # macro call to 't->tp_new()' for instantiation without calling __init__()
638 cdef XSLT NEW_XSLT "PY_NEW" (object t)
640 cdef XSLT _copyXSLT(XSLT stylesheet):
643 new_xslt = NEW_XSLT(XSLT) # without calling __init__()
644 new_xslt._access_control = stylesheet._access_control
645 new_xslt._error_log = _ErrorLog()
646 new_xslt._context = stylesheet._context._copy()
648 new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
649 new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
650 stylesheet._xslt_resolver_context._c_style_doc, 1)
652 c_doc = _copyDoc(stylesheet._c_style.doc, 1)
653 new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
654 if new_xslt._c_style is NULL:
655 tree.xmlFreeDoc(c_doc)
656 python.PyErr_NoMemory()
660 cdef class _XSLTResultTree(_ElementTree):
662 cdef _Document _profile
664 cdef Py_ssize_t _buffer_len
665 cdef Py_ssize_t _buffer_refcnt
666 cdef _saveToStringAndSize(self, char** s, int* l):
669 if self._context_node is not None:
670 doc = self._context_node._doc
677 r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
680 python.PyErr_NoMemory()
685 if python.IS_PYTHON3:
686 return self.__unicode__()
687 self._saveToStringAndSize(&s, &l)
690 # we must not use 'funicode' here as this is not always UTF-8
692 result = python.PyString_FromStringAndSize(s, l)
697 def __unicode__(self):
701 self._saveToStringAndSize(&s, &l)
704 encoding = self._xslt._c_style.encoding
708 result = python.PyUnicode_Decode(s, l, encoding, 'strict')
711 return _stripEncodingDeclaration(result)
713 def __getbuffer__(self, Py_buffer* buffer, int flags):
717 if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
718 self._saveToStringAndSize(<char**>&buffer.buf, &l)
720 if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
721 self._buffer = <char*>buffer.buf
723 self._buffer_refcnt = 1
725 buffer.buf = <char*>self._buffer
726 buffer.len = self._buffer_len
727 self._buffer_refcnt += 1
728 if flags & python.PyBUF_WRITABLE:
732 if flags & python.PyBUF_FORMAT:
738 buffer.strides = NULL
739 buffer.suboffsets = NULL
741 buffer.internal = NULL
743 def __releasebuffer__(self, Py_buffer* buffer):
746 if <char*>buffer.buf is self._buffer:
747 self._buffer_refcnt -= 1
748 if self._buffer_refcnt == 0:
749 tree.xmlFree(self._buffer)
752 tree.xmlFree(<char*>buffer.buf)
755 property xslt_profile:
756 u"""Return an ElementTree with profiling data for the stylesheet run.
760 if self._profile is None:
762 root = self._profile.getroot()
765 return ElementTree(root)
770 cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
771 cdef _XSLTResultTree result
772 result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
774 result._profile = profile
775 result._buffer = NULL
776 result._buffer_refcnt = 0
777 result._buffer_len = 0
780 # functions like "output" and "write" are a potential security risk, but we
781 # rely on the user to configure XSLTAccessControl as needed
782 xslt.xsltRegisterAllExtras()
784 # enable EXSLT support for XSLT
785 xslt.exsltRegisterAll()
788 ################################################################################
791 cdef object _FIND_PI_ATTRIBUTES
792 _FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*["\']([^"\']+)["\']', re.U).findall
794 cdef object _RE_PI_HREF
795 _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*["\']([^"\']+)["\']')
797 cdef object _FIND_PI_HREF
798 _FIND_PI_HREF = _RE_PI_HREF.findall
800 cdef object _REPLACE_PI_HREF
801 _REPLACE_PI_HREF = _RE_PI_HREF.sub
803 cdef XPath __findStylesheetByID
804 __findStylesheetByID = None
806 cdef _findStylesheetByID(_Document doc, id):
807 global __findStylesheetByID
808 if __findStylesheetByID is None:
809 __findStylesheetByID = XPath(
810 u"//xsl:stylesheet[@xml:id = $id]",
811 namespaces={u"xsl" : u"http://www.w3.org/1999/XSL/Transform"})
812 return __findStylesheetByID(doc, id=id)
814 cdef class _XSLTProcessingInstruction(PIBase):
815 def parseXSL(self, parser=None):
816 u"""Try to parse the stylesheet referenced by this PI and return an
817 ElementTree for it. If the stylesheet is embedded in the same
818 document (referenced via xml:id), find and return an ElementTree for
819 the stylesheet Element.
821 The optional ``parser`` keyword argument can be passed to specify the
822 parser used to read from external stylesheet URLs.
824 cdef _Document result_doc
825 cdef _Element result_node
828 if self._c_node.content is NULL:
829 raise ValueError, u"PI lacks content"
830 hrefs = _FIND_PI_HREF(u' ' + funicode(self._c_node.content))
832 raise ValueError, u"malformed PI attributes"
833 href_utf = utf8(hrefs[0])
834 c_href = _cstr(href_utf)
836 if c_href[0] != c'#':
837 # normal URL, try to parse from it
838 c_href = tree.xmlBuildURI(
840 tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
841 if c_href is not NULL:
844 result_doc = _parseDocumentFromURL(href_utf, parser)
845 return _elementTreeFactory(result_doc, None)
847 # ID reference to embedded stylesheet
849 c_href += 1 # skip leading '#'
850 c_attr = tree.xmlGetID(self._c_node.doc, c_href)
851 if c_attr is not NULL and c_attr.doc is self._c_node.doc:
852 result_node = _elementFactory(self._doc, c_attr.parent)
853 return _elementTreeFactory(result_node._doc, result_node)
856 root = _findStylesheetByID(self._doc, funicode(c_href))
858 raise ValueError, u"reference to non-existing embedded stylesheet"
860 raise ValueError, u"ambiguous reference to embedded stylesheet"
861 result_node = root[0]
862 return _elementTreeFactory(result_node._doc, result_node)
864 def set(self, key, value):
866 raise AttributeError, \
867 u"only setting the 'href' attribute is supported on XSLT-PIs"
870 elif u'"' in value or u'>' in value:
871 raise ValueError, u"Invalid URL, must not contain '\"' or '>'"
873 attrib = u' href="%s"' % value
874 text = u' ' + self.text
875 if _FIND_PI_HREF(text):
876 self.text = _REPLACE_PI_HREF(attrib, text)
878 self.text = text + attrib
880 def get(self, key, default=None):
881 for attr, value in _FIND_PI_ATTRIBUTES(u' ' + self.text):