1 A quick test of tokenizing:
3 >>> from lxml.cssselect import tokenize, parse
5 ... for item in tokenize(s):
6 ... print(repr(item).replace("u'", "'"))
7 >>> ptok('E > f[a~="y\\"x"]')
19 >>> parse('td.foo, .bar')
20 Or([Class[Element[td].foo], CombinedSelector[Element[*] <followed> Class[Element[*].bar]]])
21 >>> parse('div, td.foo, div.bar span')
22 Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] <followed> Element[span]]])
24 CombinedSelector[Element[div] > Element[p]]
26 Pseudo[Element[td]:first]
28 Attrib[Element[a][name]]
29 >>> repr(parse('a[rel="include"]')).replace("u'", "'")
30 "Attrib[Element[a][rel = String('include', 6)]]"
31 >>> repr(parse('a[hreflang |= \'en\']')).replace("u'", "'")
32 "Attrib[Element[a][hreflang |= String('en', 14)]]"
33 >>> parse('div:nth-child(10)')
34 Function[Element[div]:nth-child(10)]
35 >>> parse('div:nth-of-type(10)')
36 Function[Element[div]:nth-of-type(10)]
37 >>> parse('div div:nth-of-type(10) .aclass')
38 CombinedSelector[CombinedSelector[Element[div] <followed> Function[Element[div]:nth-of-type(10)]] <followed> Class[Element[*].aclass]]
39 >>> parse('label:only')
40 Pseudo[Element[label]:only]
41 >>> parse('a:lang(fr)')
42 Function[Element[a]:lang(Element[fr])]
43 >>> repr(parse('div:contains("foo")')).replace("u'", "'")
44 "Function[Element[div]:contains(String('foo', 13))]"
45 >>> parse('div#foobar')
46 Hash[Element[div]#foobar]
47 >>> parse('div:not(div.foo)')
48 Function[Element[div]:not(Class[Element[div].foo])]
50 CombinedSelector[Element[td] ~ Element[th]]
52 Some parse error tests:
54 >>> try: parse('attributes(href)/html/body/a')
55 ... except: # Py2, Py3, ...
57 ... print(str(sys.exc_info()[1]).replace("(u'", "('"))
58 Expected selector, got '(' at [Symbol('attributes', 0)] -> Token('(', 10)
63 ... print(parse(css).xpath())
70 >>> xpath('E[foo="bar"]')
72 >>> xpath('E[foo~="bar"]')
73 e[contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]
74 >>> xpath('E[foo^="bar"]')
75 e[starts-with(@foo, 'bar')]
76 >>> xpath('E[foo$="bar"]')
77 e[substring(@foo, string-length(@foo)-2) = 'bar']
78 >>> xpath('E[foo*="bar"]')
79 e[contains(@foo, 'bar')]
80 >>> xpath('E[hreflang|="en"]')
81 e[@hreflang = 'en' or starts-with(@hreflang, 'en-')]
83 >>> xpath('E:nth-child(1)')
84 */*[name() = 'e' and (position() = 1)]
85 >>> xpath('E:nth-last-child(1)')
86 */*[name() = 'e' and (position() = last() - 1)]
87 >>> xpath('E:nth-last-child(2n+2)')
88 */*[name() = 'e' and ((position() +2) mod -2 = 0 and position() < (last() -2))]
89 >>> xpath('E:nth-of-type(1)')
91 >>> xpath('E:nth-last-of-type(1)')
92 */e[position() = last() - 1]
93 >>> xpath('E:nth-last-of-type(1)')
94 */e[position() = last() - 1]
95 >>> xpath('div E:nth-last-of-type(1) .aclass')
96 div/descendant::e[position() = last() - 1]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]
97 >>> xpath('E:first-child')
98 */*[name() = 'e' and (position() = 1)]
99 >>> xpath('E:last-child')
100 */*[name() = 'e' and (position() = last())]
101 >>> xpath('E:first-of-type')
103 >>> xpath('E:last-of-type')
104 */e[position() = last()]
105 >>> xpath('E:only-child')
106 */*[name() = 'e' and (last() = 1)]
107 >>> xpath('E:only-of-type')
110 e[not(*) and not(normalize-space())]
111 >>> xpath('E:contains("foo")')
112 e[contains(css:lower-case(string(.)), 'foo')]
113 >>> xpath('E.warning')
114 e[contains(concat(' ', normalize-space(@class), ' '), ' warning ')]
117 >>> xpath('E:not(:contains("foo"))')
118 e[not(contains(css:lower-case(string(.)), 'foo'))]
124 e/following-sibling::*[name() = 'f' and (position() = 1)]
126 e/following-sibling::f
127 >>> xpath('div#container p')
128 div[@id = 'container']/descendant::p
129 >>> xpath('p *:only-of-type')
130 Traceback (most recent call last):
132 NotImplementedError: *:only-of-type is not implemented
134 Now a Unicode character test:
136 >>> from lxml.cssselect import css_to_xpath
138 >>> if sys.version_info[0] >= 3:
139 ... css_expr = '.a\xc1b'
141 ... css_expr = '.a\xc1b'.decode('ISO-8859-1')
143 >>> xpath_expr = css_to_xpath(css_expr)
144 >>> print( css_expr[1:] in xpath_expr )
146 >>> print( xpath_expr.encode('ascii', 'xmlcharrefreplace').decode('ASCII') )
147 descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' aÁb ')]
149 And some special character tests:
151 >>> print( css_to_xpath('*[aval="\'"]') )
152 descendant-or-self::*[@aval = "'"]
153 >>> print( css_to_xpath('*[aval="\'\'\'"]') )
154 descendant-or-self::*[@aval = "'''"]
155 >>> print( css_to_xpath('*[aval=\'"\']') )
156 descendant-or-self::*[@aval = '"']
157 >>> print( css_to_xpath('*[aval=\'"""\']') )
158 descendant-or-self::*[@aval = '"""']
160 Some Unicode escape tests (including the trailing whitespace rules):
162 >>> print( css_to_xpath(r'*[aval="\'\22\'"]') ) # \22 == '"'
163 descendant-or-self::*[@aval = concat("'",'"',"'")]
164 >>> print( css_to_xpath(r'*[aval="\'\22 2\'"]') )
165 descendant-or-self::*[@aval = concat("'",'"2',"'")]
166 >>> print( css_to_xpath(r'*[aval="\'\20 \'"]') ) # \20 == ' '
167 descendant-or-self::*[@aval = "' '"]
168 >>> print( css_to_xpath('*[aval="\'\\20\r\n \'"]') )
169 descendant-or-self::*[@aval = "' '"]
171 Then some test for parse_series:
173 >>> from lxml.cssselect import parse_series
174 >>> parse_series('1n+3')
176 >>> parse_series('n-5')
178 >>> parse_series('odd')
180 >>> parse_series('3n')
182 >>> parse_series('n')
184 >>> parse_series('5')