1 /****************************************************************************
3 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4 ** All rights reserved.
5 ** Contact: http://www.qt-project.org/
7 ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** GNU Lesser General Public License Usage
11 ** This file may be used under the terms of the GNU Lesser General Public
12 ** License version 2.1 as published by the Free Software Foundation and
13 ** appearing in the file LICENSE.LGPL included in the packaging of this
14 ** file. Please review the following information to ensure the GNU Lesser
15 ** General Public License version 2.1 requirements will be met:
16 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
18 ** In addition, as a special exception, Nokia gives you certain additional
19 ** rights. These rights are described in the Nokia Qt LGPL Exception
20 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
22 ** GNU General Public License Usage
23 ** Alternatively, this file may be used under the terms of the GNU General
24 ** Public License version 3.0 as published by the Free Software Foundation
25 ** and appearing in the file LICENSE.GPL included in the packaging of this
26 ** file. Please review the following information to ensure the GNU General
27 ** Public License version 3.0 requirements will be met:
28 ** http://www.gnu.org/copyleft/gpl.html.
31 ** Alternatively, this file may be used in accordance with the terms and
32 ** conditions contained in a signed written agreement between you and Nokia.
40 ****************************************************************************/
46 // This file is not part of the Qt API. It exists purely as an
47 // implementation detail. This header file may change from version to
48 // version without notice, or even be removed.
51 #ifndef Patternist_XQueryTokenizer_H
52 #define Patternist_XQueryTokenizer_H
60 #include <private/qtokenizer_p.h>
71 * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
72 * and delivers tokens to the Bison generated parser.
74 * @author Frans Englich <frans.englich@nokia.com>
76 class XQueryTokenizer : public Tokenizer
80 * Tokenizer states. Organized alphabetically.
99 ProcessingInstructionContent,
100 ProcessingInstructionName,
101 QuotAttributeContent,
109 XQueryTokenizer(const QString &query,
110 const QUrl &location,
111 const State startingState = Default);
113 virtual Token nextToken(YYLTYPE *const sourceLocator);
114 virtual int commenceScanOnly();
115 virtual void resumeTokenizationFrom(const int position);
120 virtual void setParserContext(const ParserContext::Ptr &parseInfo);
125 * Returns the character corresponding to the builtin reference @p
126 * reference. For instance, passing @c gt will give you '>' in return.
128 * If @p reference is an invalid character reference, a null QChar is
131 * @see QChar::isNull()
133 QChar charForReference(const QString &reference);
135 inline Token tokenAndChangeState(const TokenType code,
137 const int advance = 1);
138 inline Token tokenAndChangeState(const TokenType code,
139 const QString &value,
141 inline Token tokenAndAdvance(const TokenType code,
142 const int advance = 1);
143 QString tokenizeCharacterReference();
145 inline Token tokenizeStringLiteral();
146 inline Token tokenizeNumberLiteral();
149 * @returns the character @p length characters from the current
152 inline char peekAhead(const int length = 1) const;
155 * @returns whether the stream, starting from @p offset from the
156 * current position, matches @p chs. The length of @p chs is @p len.
158 inline bool aheadEquals(const char *const chs,
160 const int offset = 1) const;
162 inline Token tokenizeNCName();
163 static inline bool isOperatorKeyword(const TokenType);
165 static inline bool isDigit(const char ch);
166 static inline Token error();
167 inline TokenType consumeWhitespace();
170 * @short Returns the character at the current position, converted to
173 * Equivalent to calling:
176 * current().toAscii();
179 inline char peekCurrent() const;
182 * Disregarding encoding conversion, equivalent to calling:
188 inline const QChar current() const;
191 * @p hadWhitespace is always set to a proper value.
193 * @returns the length of whitespace scanned before reaching "::", or
194 * -1 if something else was found.
196 int peekForColonColon() const;
198 static inline bool isNCNameStart(const QChar ch);
199 static inline bool isNCNameBody(const QChar ch);
200 static inline const TokenMap *lookupKeyword(const QString &keyword);
201 inline void popState();
202 inline void pushState(const State state);
203 inline State state() const;
204 inline void setState(const State s);
205 static bool isTypeToken(const TokenType t);
207 inline Token tokenizeNCNameOrQName();
209 * Advances m_pos until content is encountered.
211 * Returned is the length stretching from m_pos when starting, until
212 * @p content is encountered. @p content is not included in the length.
214 int scanUntil(const char *const content);
219 * pushState(currentState());
222 inline void pushState();
225 * Consumes only whitespace, in the traditional sense. The function exits
226 * if non-whitespace is encountered, such as the start of a comment.
228 * @returns @c true if the end was reached, otherwise @c false
230 inline bool consumeRawWhitespace();
233 * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
234 * parsing nested comments.
236 * It is assumed that the start token for the comment, "(:", has
237 * already been parsed.
239 * Typically, don't call this function, but ignoreWhitespace().
241 * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
242 * 2.0, 2.6 Comments</a>
244 * - SUCCESS if everything went ok
245 * - ERROR if there was an error in parsing one or more comments
246 * - END_OF_FILE if the end was reached
248 Tokenizer::TokenType consumeComment();
251 * Determines whether @p code is a keyword
252 * that is followed by a second keyword. For instance <tt>declare
255 static inline bool isPhraseKeyword(const TokenType code);
258 * A set of indexes into a QString, the one being passed to
259 * normalizeEOL() whose characters shouldn't be normalized. */
260 typedef QSet<int> CharacterSkips;
263 * Returns @p input, normalized according to
264 * <a href="http://www.w3.org/TR/xquery/#id-eol-handling">XQuery 1.0:
265 * An XML Query Language, A.2.3 End-of-Line Handling</a>
267 static QString normalizeEOL(const QString &input,
268 const CharacterSkips &characterSkips);
270 inline bool atEnd() const
272 return m_pos == m_length;
277 * Instead of recognizing and tokenizing embedded expressions in
278 * direct attriute constructors, this function is essentially a mini
279 * recursive-descent parser that has the necessary logic to recognize
280 * embedded expressions and their potentially interfering string literals, in
281 * order to scan to the very end of the attribute value, and return the
284 * There is of course syntax errors this function will not detect, but
285 * that is ok since the attributes will be parsed once more.
287 * An inelegant solution, but which gets the job done.
289 * @see commenceScanOnly(), resumeTokenizationFrom()
291 Token attributeAsRaw(const QChar separator,
294 const bool inLiteral,
297 const QString m_data;
300 QStack<State> m_stateStack;
304 * The current line number.
306 * The line number and column number both starts at 1.
311 * The offset into m_length for where
312 * the current column starts. So m_length - m_columnOffset
313 * is the current column.
315 * The line number and column number both starts at 1.
319 const NamePool::Ptr m_namePool;
320 QStack<Token> m_tokenStack;
321 QHash<QString, QChar> m_charRefs;
324 Q_DISABLE_COPY(XQueryTokenizer)