src/lxml/tests/test_css.txt

   1 A quick test of tokenizing:
   2
   3     >>> from lxml.cssselect import tokenize, parse
   4     >>> def ptok(s):
   5     ...     for item in tokenize(s):
   6     ...         print(repr(item).replace("u'", "'"))
   7     >>> ptok('E > f[a~="y\\"x"]')
   8     Symbol('E', 0)
   9     Token('>', 2)
  10     Symbol('f', 4)
  11     Token('[', 5)
  12     Symbol('a', 6)
  13     Token('~=', 7)
  14     String('y"x', 9)
  15     Token(']', 15)
  16
  17 Then of parsing:
  18
  19     >>> parse('td.foo, .bar')
  20     Or([Class[Element[td].foo], CombinedSelector[Element[*] <followed> Class[Element[*].bar]]])
  21     >>> parse('div, td.foo, div.bar span')
  22     Or([Element[div], Class[Element[td].foo], CombinedSelector[Class[Element[div].bar] <followed> Element[span]]])
  23     >>> parse('div > p')
  24     CombinedSelector[Element[div] > Element[p]]
  25     >>> parse('td:first')
  26     Pseudo[Element[td]:first]
  27     >>> parse('a[name]')
  28     Attrib[Element[a][name]]
  29     >>> repr(parse('a[rel="include"]')).replace("u'", "'")
  30     "Attrib[Element[a][rel = String('include', 6)]]"
  31     >>> repr(parse('a[hreflang |= \'en\']')).replace("u'", "'")
  32     "Attrib[Element[a][hreflang |= String('en', 14)]]"
  33     >>> parse('div:nth-child(10)')
  34     Function[Element[div]:nth-child(10)]
  35     >>> parse('div:nth-of-type(10)')
  36     Function[Element[div]:nth-of-type(10)]
  37     >>> parse('div div:nth-of-type(10) .aclass')
  38     CombinedSelector[CombinedSelector[Element[div] <followed> Function[Element[div]:nth-of-type(10)]] <followed> Class[Element[*].aclass]]
  39     >>> parse('label:only')
  40     Pseudo[Element[label]:only]
  41     >>> parse('a:lang(fr)')
  42     Function[Element[a]:lang(Element[fr])]
  43     >>> repr(parse('div:contains("foo")')).replace("u'", "'")
  44     "Function[Element[div]:contains(String('foo', 13))]"
  45     >>> parse('div#foobar')
  46     Hash[Element[div]#foobar]
  47     >>> parse('div:not(div.foo)')
  48     Function[Element[div]:not(Class[Element[div].foo])]
  49     >>> parse('td ~ th')
  50     CombinedSelector[Element[td] ~ Element[th]]
  51
  52 Some parse error tests:
  53
  54     >>> try: parse('attributes(href)/html/body/a')
  55     ... except: # Py2, Py3, ...
  56     ...     import sys
  57     ...     print(str(sys.exc_info()[1]).replace("(u'", "('"))
  58     Expected selector, got '(' at [Symbol('attributes', 0)] -> Token('(', 10)
  59
  60 Now of translation:
  61
  62     >>> def xpath(css):
  63     ...     print(parse(css).xpath())
  64     >>> xpath('*')
  65     *
  66     >>> xpath('E')
  67     e
  68     >>> xpath('E[foo]')
  69     e[@foo]
  70     >>> xpath('E[foo="bar"]')
  71     e[@foo = 'bar']
  72     >>> xpath('E[foo~="bar"]')
  73     e[contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]
  74     >>> xpath('E[foo^="bar"]')
  75     e[starts-with(@foo, 'bar')]
  76     >>> xpath('E[foo$="bar"]')
  77     e[substring(@foo, string-length(@foo)-2) = 'bar']
  78     >>> xpath('E[foo*="bar"]')
  79     e[contains(@foo, 'bar')]
  80     >>> xpath('E[hreflang|="en"]')
  81     e[@hreflang = 'en' or starts-with(@hreflang, 'en-')]
  82     >>> #xpath('E:root')
  83     >>> xpath('E:nth-child(1)')
  84     */*[name() = 'e' and (position() = 1)]
  85     >>> xpath('E:nth-last-child(1)')
  86     */*[name() = 'e' and (position() = last() - 1)]
  87     >>> xpath('E:nth-last-child(2n+2)')
  88     */*[name() = 'e' and ((position() +2) mod -2 = 0 and position() < (last() -2))]
  89     >>> xpath('E:nth-of-type(1)')
  90     */e[position() = 1]
  91     >>> xpath('E:nth-last-of-type(1)')
  92     */e[position() = last() - 1]
  93     >>> xpath('E:nth-last-of-type(1)')
  94     */e[position() = last() - 1]
  95     >>> xpath('div E:nth-last-of-type(1) .aclass')
  96     div/descendant::e[position() = last() - 1]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' aclass ')]
  97     >>> xpath('E:first-child')
  98     */*[name() = 'e' and (position() = 1)]
  99     >>> xpath('E:last-child')
 100     */*[name() = 'e' and (position() = last())]
 101     >>> xpath('E:first-of-type')
 102     */e[position() = 1]
 103     >>> xpath('E:last-of-type')
 104     */e[position() = last()]
 105     >>> xpath('E:only-child')
 106     */*[name() = 'e' and (last() = 1)]
 107     >>> xpath('E:only-of-type')
 108     e[last() = 1]
 109     >>> xpath('E:empty')
 110     e[not(*) and not(normalize-space())]
 111     >>> xpath('E:contains("foo")')
 112     e[contains(css:lower-case(string(.)), 'foo')]
 113     >>> xpath('E.warning')
 114     e[contains(concat(' ', normalize-space(@class), ' '), ' warning ')]
 115     >>> xpath('E#myid')
 116     e[@id = 'myid']
 117     >>> xpath('E:not(:contains("foo"))')
 118     e[not(contains(css:lower-case(string(.)), 'foo'))]
 119     >>> xpath('E F')
 120     e/descendant::f
 121     >>> xpath('E > F')
 122     e/f
 123     >>> xpath('E + F')
 124     e/following-sibling::*[name() = 'f' and (position() = 1)]
 125     >>> xpath('E ~ F')
 126     e/following-sibling::f
 127     >>> xpath('div#container p')
 128     div[@id = 'container']/descendant::p
 129     >>> xpath('p *:only-of-type')
 130     Traceback (most recent call last):
 131         ...
 132     NotImplementedError: *:only-of-type is not implemented
 133
 134 Now a Unicode character test:
 135
 136     >>> from lxml.cssselect import css_to_xpath
 137     >>> import sys
 138     >>> if sys.version_info[0] >= 3:
 139     ...     css_expr = '.a\xc1b'
 140     ... else:
 141     ...     css_expr = '.a\xc1b'.decode('ISO-8859-1')
 142
 143     >>> xpath_expr = css_to_xpath(css_expr)
 144     >>> print( css_expr[1:] in xpath_expr )
 145     True
 146     >>> print( xpath_expr.encode('ascii', 'xmlcharrefreplace').decode('ASCII') )
 147     descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' a&#193;b ')]
 148
 149 And some special character tests:
 150
 151     >>> print( css_to_xpath('*[aval="\'"]') )
 152     descendant-or-self::*[@aval = "'"]
 153     >>> print( css_to_xpath('*[aval="\'\'\'"]') )
 154     descendant-or-self::*[@aval = "'''"]
 155     >>> print( css_to_xpath('*[aval=\'"\']') )
 156     descendant-or-self::*[@aval = '"']
 157     >>> print( css_to_xpath('*[aval=\'"""\']') )
 158     descendant-or-self::*[@aval = '"""']
 159
 160 Some Unicode escape tests (including the trailing whitespace rules):
 161
 162     >>> print( css_to_xpath(r'*[aval="\'\22\'"]') )    # \22 == '"'
 163     descendant-or-self::*[@aval = concat("'",'"',"'")]
 164     >>> print( css_to_xpath(r'*[aval="\'\22 2\'"]') )
 165     descendant-or-self::*[@aval = concat("'",'"2',"'")]
 166     >>> print( css_to_xpath(r'*[aval="\'\20  \'"]') )  # \20 == ' '
 167     descendant-or-self::*[@aval = "'  '"]
 168     >>> print( css_to_xpath('*[aval="\'\\20\r\n \'"]') )
 169     descendant-or-self::*[@aval = "'  '"]
 170
 171 Then some test for parse_series:
 172
 173     >>> from lxml.cssselect import parse_series
 174     >>> parse_series('1n+3')
 175     (1, 3)
 176     >>> parse_series('n-5')
 177     (1, -5)
 178     >>> parse_series('odd')
 179     (2, 1)
 180     >>> parse_series('3n')
 181     (3, 0)
 182     >>> parse_series('n')
 183     (1, 0)
 184     >>> parse_series('5')
 185     (0, 5)