2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006 Apple Computer, Inc.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "core/xml/XPathParser.h"
31 #include "bindings/v8/ExceptionState.h"
32 #include "core/dom/ExceptionCode.h"
33 #include "core/xml/XPathEvaluator.h"
34 #include "core/xml/XPathNSResolver.h"
35 #include "core/xml/XPathPath.h"
36 #include "wtf/StdLibExtras.h"
37 #include "wtf/text/StringHash.h"
39 using namespace WebCore;
41 using namespace Unicode;
42 using namespace XPath;
44 #include "XPathGrammar.h"
46 Parser* Parser::currentParser = 0;
48 enum XMLCat { NameStart, NameCont, NotPartOfName };
50 typedef HashMap<String, Step::Axis> AxisNamesMap;
52 static XMLCat charCat(UChar aChar)
54 //### might need to add some special cases from the XML spec.
59 if (aChar == '.' || aChar == '-')
61 CharCategory category = Unicode::category(aChar);
62 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
64 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
69 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
75 const AxisName axisNameList[] = {
76 { "ancestor", Step::AncestorAxis },
77 { "ancestor-or-self", Step::AncestorOrSelfAxis },
78 { "attribute", Step::AttributeAxis },
79 { "child", Step::ChildAxis },
80 { "descendant", Step::DescendantAxis },
81 { "descendant-or-self", Step::DescendantOrSelfAxis },
82 { "following", Step::FollowingAxis },
83 { "following-sibling", Step::FollowingSiblingAxis },
84 { "namespace", Step::NamespaceAxis },
85 { "parent", Step::ParentAxis },
86 { "preceding", Step::PrecedingAxis },
87 { "preceding-sibling", Step::PrecedingSiblingAxis },
88 { "self", Step::SelfAxis }
90 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
91 axisNames.set(axisNameList[i].name, axisNameList[i].axis);
94 static bool isAxisName(const String& name, Step::Axis& type)
96 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
98 if (axisNames.isEmpty())
99 setUpAxisNamesMap(axisNames);
101 AxisNamesMap::iterator it = axisNames.find(name);
102 if (it == axisNames.end())
108 static bool isNodeTypeName(const String& name)
110 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
111 if (nodeTypeNames.isEmpty()) {
112 nodeTypeNames.add("comment");
113 nodeTypeNames.add("text");
114 nodeTypeNames.add("processing-instruction");
115 nodeTypeNames.add("node");
117 return nodeTypeNames.contains(name);
120 // Returns whether the current token can possibly be a binary operator, given
121 // the previous token. Necessary to disambiguate some of the operators
122 // (* (multiply), div, and, or, mod) in the [32] Operator rule
123 // (check http://www.w3.org/TR/xpath#exprlex).
124 bool Parser::isBinaryOperatorContext() const
126 switch (m_lastTokenType) {
128 case '@': case AXISNAME: case '(': case '[': case ',':
129 case AND: case OR: case MULOP:
130 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
131 case EQOP: case RELOP:
138 void Parser::skipWS()
140 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
144 Token Parser::makeTokenAndAdvance(int code, int advance)
146 m_nextPos += advance;
150 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
152 m_nextPos += advance;
153 return Token(code, val);
156 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
158 m_nextPos += advance;
159 return Token(code, val);
162 // Returns next char if it's there and interesting, 0 otherwise
163 char Parser::peekAheadHelper()
165 if (m_nextPos + 1 >= m_data.length())
167 UChar next = m_data[m_nextPos + 1];
173 char Parser::peekCurHelper()
175 if (m_nextPos >= m_data.length())
177 UChar next = m_data[m_nextPos];
183 Token Parser::lexString()
185 UChar delimiter = m_data[m_nextPos];
186 int startPos = m_nextPos + 1;
188 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
189 if (m_data[m_nextPos] == delimiter) {
190 String value = m_data.substring(startPos, m_nextPos - startPos);
193 ++m_nextPos; // Consume the char.
194 return Token(LITERAL, value);
198 // Ouch, went off the end -- report error.
199 return Token(XPATH_ERROR);
202 Token Parser::lexNumber()
204 int startPos = m_nextPos;
205 bool seenDot = false;
207 // Go until end or a non-digits character.
208 for (; m_nextPos < m_data.length(); ++m_nextPos) {
209 UChar aChar = m_data[m_nextPos];
210 if (aChar >= 0xff) break;
212 if (aChar < '0' || aChar > '9') {
213 if (aChar == '.' && !seenDot)
220 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
223 bool Parser::lexNCName(String& name)
225 int startPos = m_nextPos;
226 if (m_nextPos >= m_data.length())
229 if (charCat(m_data[m_nextPos]) != NameStart)
232 // Keep going until we get a character that's not good for names.
233 for (; m_nextPos < m_data.length(); ++m_nextPos)
234 if (charCat(m_data[m_nextPos]) == NotPartOfName)
237 name = m_data.substring(startPos, m_nextPos - startPos);
241 bool Parser::lexQName(String& name)
249 // If the next character is :, what we just got it the prefix, if not,
250 // it's the whole thing.
251 if (peekAheadHelper() != ':') {
260 name = n1 + ":" + n2;
264 Token Parser::nextTokenInternal()
268 if (m_nextPos >= m_data.length())
271 char code = peekCurHelper();
273 case '(': case ')': case '[': case ']':
274 case '@': case ',': case '|':
275 return makeTokenAndAdvance(code);
279 case '0': case '1': case '2': case '3': case '4':
280 case '5': case '6': case '7': case '8': case '9':
283 char next = peekAheadHelper();
285 return makeTokenAndAdvance(DOTDOT, 2);
286 if (next >= '0' && next <= '9')
288 return makeTokenAndAdvance('.');
291 if (peekAheadHelper() == '/')
292 return makeTokenAndAdvance(SLASHSLASH, 2);
293 return makeTokenAndAdvance('/');
295 return makeTokenAndAdvance(PLUS);
297 return makeTokenAndAdvance(MINUS);
299 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
301 if (peekAheadHelper() == '=')
302 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
303 return Token(XPATH_ERROR);
305 if (peekAheadHelper() == '=')
306 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
307 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
309 if (peekAheadHelper() == '=')
310 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
311 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
313 if (isBinaryOperatorContext())
314 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
316 return Token(NAMETEST, "*");
317 case '$': { // $ QName
321 return Token(XPATH_ERROR);
322 return Token(VARIABLEREFERENCE, name);
327 if (!lexNCName(name))
328 return Token(XPATH_ERROR);
331 // If we're in an operator context, check for any operator names
332 if (isBinaryOperatorContext()) {
333 if (name == "and") //### hash?
338 return Token(MULOP, NumericOp::OP_Mod);
340 return Token(MULOP, NumericOp::OP_Div);
343 // See whether we are at a :
344 if (peekCurHelper() == ':') {
346 // Any chance it's an axis name?
347 if (peekCurHelper() == ':') {
350 //It might be an axis name.
352 if (isAxisName(name, axis))
353 return Token(AXISNAME, axis);
354 // Ugh, :: is only valid in axis names -> error
355 return Token(XPATH_ERROR);
358 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
360 if (peekCurHelper() == '*') {
362 return Token(NAMETEST, name + ":*");
365 // Make a full qname.
368 return Token(XPATH_ERROR);
370 name = name + ":" + n2;
374 if (peekCurHelper() == '(') {
375 //note: we don't swallow the (here!
377 //either node type of function name
378 if (isNodeTypeName(name)) {
379 if (name == "processing-instruction")
380 return Token(PI, name);
382 return Token(NODETYPE, name);
384 //must be a function name.
385 return Token(FUNCTIONNAME, name);
388 // At this point, it must be NAMETEST.
389 return Token(NAMETEST, name);
392 Token Parser::nextToken()
394 Token toRet = nextTokenInternal();
395 m_lastTokenType = toRet.type;
408 void Parser::reset(const String& data)
415 m_gotNamespaceError = false;
418 int Parser::lex(void* data)
420 YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
421 Token tok = nextToken();
425 yylval->axis = tok.axis;
428 yylval->numop = tok.numop;
432 yylval->eqop = tok.eqop;
438 case VARIABLEREFERENCE:
441 yylval->str = new String(tok.str);
442 registerString(yylval->str);
449 bool Parser::expandQName(const String& qName, AtomicString& localName, AtomicString& namespaceURI)
451 size_t colon = qName.find(':');
452 if (colon != kNotFound) {
455 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
456 if (namespaceURI.isNull())
458 localName = AtomicString(qName.substring(colon + 1));
460 localName = AtomicString(qName);
466 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionState& exceptionState)
470 m_resolver = resolver;
472 Parser* oldParser = currentParser;
473 currentParser = this;
474 int parseError = xpathyyparse(this);
475 currentParser = oldParser;
478 deleteAllValues(m_parseNodes);
479 m_parseNodes.clear();
481 HashSet<Vector<OwnPtr<Predicate> >*>::iterator pend = m_predicateVectors.end();
482 for (HashSet<Vector<OwnPtr<Predicate> >*>::iterator it = m_predicateVectors.begin(); it != pend; ++it)
484 m_predicateVectors.clear();
486 HashSet<Vector<OwnPtr<Expression> >*>::iterator eend = m_expressionVectors.end();
487 for (HashSet<Vector<OwnPtr<Expression> >*>::iterator it = m_expressionVectors.begin(); it != eend; ++it)
489 m_expressionVectors.clear();
491 deleteAllValues(m_strings);
494 deleteAllValues(m_nodeTests);
499 if (m_gotNamespaceError)
500 exceptionState.throwDOMException(NamespaceError, "The string '" + statement + "' contains unresolvable namespaces.");
502 exceptionState.throwDOMException(SyntaxError, "The string '" + statement + "' is not a valid XPath expression.");
506 ASSERT(m_parseNodes.size() == 1);
507 ASSERT(*m_parseNodes.begin() == m_topExpr);
508 ASSERT(m_expressionVectors.size() == 0);
509 ASSERT(m_predicateVectors.size() == 0);
510 ASSERT(m_strings.size() == 0);
511 ASSERT(m_nodeTests.size() == 0);
513 m_parseNodes.clear();
514 Expression* result = m_topExpr;
520 void Parser::registerParseNode(ParseNode* node)
525 ASSERT(!m_parseNodes.contains(node));
527 m_parseNodes.add(node);
530 void Parser::unregisterParseNode(ParseNode* node)
535 ASSERT(m_parseNodes.contains(node));
537 m_parseNodes.remove(node);
540 void Parser::registerPredicateVector(Vector<OwnPtr<Predicate> >* vector)
545 ASSERT(!m_predicateVectors.contains(vector));
547 m_predicateVectors.add(vector);
550 void Parser::deletePredicateVector(Vector<OwnPtr<Predicate> >* vector)
555 ASSERT(m_predicateVectors.contains(vector));
557 m_predicateVectors.remove(vector);
562 void Parser::registerExpressionVector(Vector<OwnPtr<Expression> >* vector)
567 ASSERT(!m_expressionVectors.contains(vector));
569 m_expressionVectors.add(vector);
572 void Parser::deleteExpressionVector(Vector<OwnPtr<Expression> >* vector)
577 ASSERT(m_expressionVectors.contains(vector));
579 m_expressionVectors.remove(vector);
583 void Parser::registerString(String* s)
588 ASSERT(!m_strings.contains(s));
593 void Parser::deleteString(String* s)
598 ASSERT(m_strings.contains(s));
604 void Parser::registerNodeTest(Step::NodeTest* t)
609 ASSERT(!m_nodeTests.contains(t));
614 void Parser::deleteNodeTest(Step::NodeTest* t)
619 ASSERT(m_nodeTests.contains(t));
621 m_nodeTests.remove(t);