Imported Upstream version 12.1.0
[contrib/python-twisted.git] / twisted / words / xish / xpath.py
1 # -*- test-case-name: twisted.words.test.test_xpath -*-
2 #
3 # Copyright (c) Twisted Matrix Laboratories.
4 # See LICENSE for details.
5
6 """
7 XPath query support.
8
9 This module provides L{XPathQuery} to match
10 L{domish.Element<twisted.words.xish.domish.Element>} instances against
11 XPath-like expressions.
12 """
13
14 try:
15     import cStringIO as StringIO
16 except ImportError:
17     import StringIO
18
19 class LiteralValue(str):
20     def value(self, elem):
21         return self
22
23
24 class IndexValue:
25     def __init__(self, index):
26         self.index = int(index) - 1
27
28     def value(self, elem):
29         return elem.children[self.index]
30
31
32 class AttribValue:
33     def __init__(self, attribname):
34         self.attribname = attribname
35         if self.attribname == "xmlns":
36             self.value = self.value_ns
37
38     def value_ns(self, elem):
39         return elem.uri
40
41     def value(self, elem):
42         if self.attribname in elem.attributes:
43             return elem.attributes[self.attribname]
44         else:
45             return None
46
47
48 class CompareValue:
49     def __init__(self, lhs, op, rhs):
50         self.lhs = lhs
51         self.rhs = rhs
52         if op == "=":
53             self.value = self._compareEqual
54         else:
55             self.value = self._compareNotEqual
56
57     def _compareEqual(self, elem):
58         return self.lhs.value(elem) == self.rhs.value(elem)
59
60     def _compareNotEqual(self, elem):
61         return self.lhs.value(elem) != self.rhs.value(elem)
62
63
64 class BooleanValue:
65     """
66     Provide boolean XPath expression operators.
67
68     @ivar lhs: Left hand side expression of the operator.
69     @ivar op: The operator. One of C{'and'}, C{'or'}.
70     @ivar rhs: Right hand side expression of the operator.
71     @ivar value: Reference to the method that will calculate the value of
72                  this expression given an element.
73     """
74     def __init__(self, lhs, op, rhs):
75         self.lhs = lhs
76         self.rhs = rhs
77         if op == "and":
78             self.value = self._booleanAnd
79         else:
80             self.value = self._booleanOr
81
82     def _booleanAnd(self, elem):
83         """
84         Calculate boolean and of the given expressions given an element.
85
86         @param elem: The element to calculate the value of the expression from.
87         """
88         return self.lhs.value(elem) and self.rhs.value(elem)
89
90     def _booleanOr(self, elem):
91         """
92         Calculate boolean or of the given expressions given an element.
93
94         @param elem: The element to calculate the value of the expression from.
95         """
96         return self.lhs.value(elem) or self.rhs.value(elem)
97
98
99 def Function(fname):
100     """
101     Internal method which selects the function object
102     """
103     klassname = "_%s_Function" % fname
104     c = globals()[klassname]()
105     return c
106
107
108 class _not_Function:
109     def __init__(self):
110         self.baseValue = None
111
112     def setParams(self, baseValue):
113         self.baseValue = baseValue
114
115     def value(self, elem):
116         return not self.baseValue.value(elem)
117
118
119 class _text_Function:
120     def setParams(self):
121         pass
122
123     def value(self, elem):
124         return str(elem)
125
126
127 class _Location:
128     def __init__(self):
129         self.predicates = []
130         self.elementName  = None
131         self.childLocation = None
132
133     def matchesPredicates(self, elem):
134         if self.elementName != None and self.elementName != elem.name:
135             return 0
136
137         for p in self.predicates:
138             if not p.value(elem):
139                 return 0
140
141         return 1
142
143     def matches(self, elem):
144         if not self.matchesPredicates(elem):
145             return 0
146
147         if self.childLocation != None:
148             for c in elem.elements():
149                 if self.childLocation.matches(c):
150                     return 1
151         else:
152             return 1
153
154         return 0
155
156     def queryForString(self, elem, resultbuf):
157         if not self.matchesPredicates(elem):
158             return
159
160         if self.childLocation != None:
161             for c in elem.elements():
162                 self.childLocation.queryForString(c, resultbuf)
163         else:
164             resultbuf.write(str(elem))
165
166     def queryForNodes(self, elem, resultlist):
167         if not self.matchesPredicates(elem):
168             return
169
170         if self.childLocation != None:
171             for c in elem.elements():
172                 self.childLocation.queryForNodes(c, resultlist)
173         else:
174             resultlist.append(elem)
175
176     def queryForStringList(self, elem, resultlist):
177         if not self.matchesPredicates(elem):
178             return
179
180         if self.childLocation != None:
181             for c in elem.elements():
182                 self.childLocation.queryForStringList(c, resultlist)
183         else:
184             for c in elem.children:
185                 if isinstance(c, (str, unicode)):
186                     resultlist.append(c)
187
188
189 class _AnyLocation:
190     def __init__(self):
191         self.predicates = []
192         self.elementName = None
193         self.childLocation = None
194
195     def matchesPredicates(self, elem):
196         for p in self.predicates:
197             if not p.value(elem):
198                 return 0
199         return 1
200
201     def listParents(self, elem, parentlist):
202         if elem.parent != None:
203             self.listParents(elem.parent, parentlist)
204         parentlist.append(elem.name)
205
206     def isRootMatch(self, elem):
207         if (self.elementName == None or self.elementName == elem.name) and \
208            self.matchesPredicates(elem):
209             if self.childLocation != None:
210                 for c in elem.elements():
211                     if self.childLocation.matches(c):
212                         return True
213             else:
214                 return True
215         return False
216
217     def findFirstRootMatch(self, elem):
218         if (self.elementName == None or self.elementName == elem.name) and \
219            self.matchesPredicates(elem):
220             # Thus far, the name matches and the predicates match,
221             # now check into the children and find the first one
222             # that matches the rest of the structure
223             # the rest of the structure
224             if self.childLocation != None:
225                 for c in elem.elements():
226                     if self.childLocation.matches(c):
227                         return c
228                 return None
229             else:
230                 # No children locations; this is a match!
231                 return elem
232         else:
233             # Ok, predicates or name didn't match, so we need to start
234             # down each child and treat it as the root and try
235             # again
236             for c in elem.elements():
237                 if self.matches(c):
238                     return c
239             # No children matched...
240             return None
241
242     def matches(self, elem):
243         if self.isRootMatch(elem):
244             return True
245         else:
246             # Ok, initial element isn't an exact match, walk
247             # down each child and treat it as the root and try
248             # again
249             for c in elem.elements():
250                 if self.matches(c):
251                     return True
252             # No children matched...
253             return False
254
255     def queryForString(self, elem, resultbuf):
256         raise NotImplementedError(
257             "queryForString is not implemented for any location")
258
259     def queryForNodes(self, elem, resultlist):
260         # First check to see if _this_ element is a root
261         if self.isRootMatch(elem):
262             resultlist.append(elem)
263
264         # Now check each child
265         for c in elem.elements():
266             self.queryForNodes(c, resultlist)
267
268
269     def queryForStringList(self, elem, resultlist):
270         if self.isRootMatch(elem):
271             for c in elem.children:
272                 if isinstance(c, (str, unicode)):
273                     resultlist.append(c)
274         for c in elem.elements():
275             self.queryForStringList(c, resultlist)
276
277
278 class XPathQuery:
279     def __init__(self, queryStr):
280         self.queryStr = queryStr
281         from twisted.words.xish.xpathparser import parse
282         self.baseLocation = parse('XPATH', queryStr)
283
284     def __hash__(self):
285         return self.queryStr.__hash__()
286
287     def matches(self, elem):
288         return self.baseLocation.matches(elem)
289
290     def queryForString(self, elem):
291         result = StringIO.StringIO()
292         self.baseLocation.queryForString(elem, result)
293         return result.getvalue()
294
295     def queryForNodes(self, elem):
296         result = []
297         self.baseLocation.queryForNodes(elem, result)
298         if len(result) == 0:
299             return None
300         else:
301             return result
302
303     def queryForStringList(self, elem):
304         result = []
305         self.baseLocation.queryForStringList(elem, result)
306         if len(result) == 0:
307             return None
308         else:
309             return result
310
311
312 __internedQueries = {}
313
314 def internQuery(queryString):
315     if queryString not in __internedQueries:
316         __internedQueries[queryString] = XPathQuery(queryString)
317     return __internedQueries[queryString]
318
319
320 def matches(xpathstr, elem):
321     return internQuery(xpathstr).matches(elem)
322
323
324 def queryForStringList(xpathstr, elem):
325     return internQuery(xpathstr).queryForStringList(elem)
326
327
328 def queryForString(xpathstr, elem):
329     return internQuery(xpathstr).queryForString(elem)
330
331
332 def queryForNodes(xpathstr, elem):
333     return internQuery(xpathstr).queryForNodes(elem)