1 /****************************************************************************
4 ** Implementation of QXmlSimpleReader and related classes.
8 ** Copyright (C) 1992-2000 Trolltech AS. All rights reserved.
10 ** This file is part of the XML module of the Qt GUI Toolkit.
12 ** This file may be distributed under the terms of the Q Public License
13 ** as defined by Trolltech AS of Norway and appearing in the file
14 ** LICENSE.QPL included in the packaging of this file.
16 ** This file may be distributed and/or modified under the terms of the
17 ** GNU General Public License version 2 as published by the Free Software
18 ** Foundation and appearing in the file LICENSE.GPL included in the
19 ** packaging of this file.
21 ** Licensees holding valid Qt Enterprise Edition licenses may use this
22 ** file in accordance with the Qt Commercial License Agreement provided
25 ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26 ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29 ** information about Qt Commercial License Agreements.
30 ** See http://www.trolltech.com/qpl/ for QPL licensing information.
31 ** See http://www.trolltech.com/gpl/ for GPL licensing information.
33 ** Contact info@trolltech.com if any conditions of this licensing are
36 **********************************************************************/
40 #include "qtextcodec.h"
46 // Error strings for the XML reader
47 #define XMLERR_OK "no error occurred"
48 #define XMLERR_TAGMISMATCH "tag mismatch"
49 #define XMLERR_UNEXPECTEDEOF "unexpected end of file"
50 #define XMLERR_FINISHEDPARSINGWHILENOTEOF "parsing is finished but end of file is not reached"
51 #define XMLERR_LETTEREXPECTED "letter is expected"
52 #define XMLERR_ERRORPARSINGELEMENT "error while parsing element"
53 #define XMLERR_ERRORPARSINGPROLOG "error while parsing prolog"
54 #define XMLERR_ERRORPARSINGMAINELEMENT "error while parsing main element"
55 #define XMLERR_ERRORPARSINGCONTENT "error while parsing content"
56 #define XMLERR_ERRORPARSINGNAME "error while parsing name"
57 #define XMLERR_ERRORPARSINGNMTOKEN "error while parsing Nmtoken"
58 #define XMLERR_ERRORPARSINGATTRIBUTE "error while parsing attribute"
59 #define XMLERR_ERRORPARSINGMISC "error while parsing misc"
60 #define XMLERR_ERRORPARSINGCHOICE "error while parsing choice or seq"
61 #define XMLERR_ERRORBYCONSUMER "error triggered by consumer"
62 #define XMLERR_UNEXPECTEDCHARACTER "unexpected character"
63 #define XMLERR_EQUALSIGNEXPECTED "expected '=' but not found"
64 #define XMLERR_QUOTATIONEXPECTED "expected \" or ' but not found"
65 #define XMLERR_ERRORPARSINGREFERENCE "error while parsing reference"
66 #define XMLERR_ERRORPARSINGPI "error while parsing processing instruction"
67 #define XMLERR_ERRORPARSINGATTLISTDECL "error while parsing attribute list declaration"
68 #define XMLERR_ERRORPARSINGATTTYPE "error while parsing attribute type declaration"
69 #define XMLERR_ERRORPARSINGATTVALUE "error while parsing attribute value declaration"
70 #define XMLERR_ERRORPARSINGELEMENTDECL "error while parsing element declaration"
71 #define XMLERR_ERRORPARSINGENTITYDECL "error while parsing entity declaration"
72 #define XMLERR_ERRORPARSINGNOTATIONDECL "error while parsing notation declaration"
73 #define XMLERR_ERRORPARSINGEXTERNALID "error while parsing external id"
74 #define XMLERR_ERRORPARSINGCOMMENT "error while parsing comment"
75 #define XMLERR_ERRORPARSINGENTITYVALUE "error while parsing entity value declaration"
76 #define XMLERR_CDSECTHEADEREXPECTED "expected the header for a cdata section"
77 #define XMLERR_MORETHANONEDOCTYPE "more than one document type definition"
78 #define XMLERR_ERRORPARSINGDOCTYPE "error while parsing document type definition"
79 #define XMLERR_INVALIDNAMEFORPI "invalid name for processing instruction"
80 #define XMLERR_VERSIONEXPECTED "version expected while reading the XML declaration"
81 #define XMLERR_EDECLORSDDECLEXPECTED "EDecl or SDDecl expected while reading the XML declaration"
82 #define XMLERR_SDDECLEXPECTED "SDDecl expected while reading the XML declaration"
83 #define XMLERR_WRONGVALUEFORSDECL "wrong value for standalone declaration"
84 #define XMLERR_UNPARSEDENTITYREFERENCE "unparsed entity reference in wrong context"
85 #define XMLERR_INTERNALGENERALENTITYINDTD "internal general entity reference not allowed in DTD"
86 #define XMLERR_EXTERNALGENERALENTITYINDTD "external parsed general entity reference not allowed in DTD"
87 #define XMLERR_EXTERNALGENERALENTITYINAV "external parsed general entity reference not allowed in attribute value"
90 // the constants for the lookup table
91 static const signed char cltWS = 0; // white space
92 static const signed char cltPer = 1; // %
93 static const signed char cltAmp = 2; // &
94 static const signed char cltGt = 3; // >
95 static const signed char cltLt = 4; // <
96 static const signed char cltSlash = 5; // /
97 static const signed char cltQm = 6; // ?
98 static const signed char cltEm = 7; // !
99 static const signed char cltDash = 8; // -
100 static const signed char cltCB = 9; // ]
101 static const signed char cltOB = 10; // [
102 static const signed char cltEq = 11; // =
103 static const signed char cltDq = 12; // "
104 static const signed char cltSq = 13; // '
105 static const signed char cltUnknown = 14;
107 // character lookup table
108 static const signed char charLookupTable[256]={
109 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x00 - 0x07
118 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x17 - 0x16
119 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x18 - 0x1F
136 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x30 - 0x37
145 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x40 - 0x47
146 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x48 - 0x4F
147 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x50 - 0x57
156 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x60 - 0x67
157 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x68 - 0x6F
158 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x70 - 0x77
159 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x78 - 0x7F
160 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x80 - 0x87
161 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x88 - 0x8F
162 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x90 - 0x97
163 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0x98 - 0x9F
164 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xA0 - 0xA7
165 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xA8 - 0xAF
166 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xB0 - 0xB7
167 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xB8 - 0xBF
168 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xC0 - 0xC7
169 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xC8 - 0xCF
170 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xD0 - 0xD7
171 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xD8 - 0xDF
172 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xE0 - 0xE7
173 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xE8 - 0xEF
174 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, // 0xF0 - 0xF7
175 cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown, cltUnknown // 0xF8 - 0xFF
179 class QXmlNamespaceSupportPrivate
182 class QXmlAttributesPrivate
185 class QXmlInputSourcePrivate
188 class QXmlParseExceptionPrivate
191 class QXmlLocatorPrivate
194 class QXmlDefaultHandlerPrivate
198 #if defined(Q_FULL_TEMPLATE_INSTANTIATION)
199 bool operator==( const QMap<QString, QString>, const QMap<QString, QString> )
206 \class QXmlParseException qxml.h
207 \brief The QXmlParseException class is used to report errors with the
208 QXmlErrorHandler interface.
215 \fn QXmlParseException::QXmlParseException( const QString& name, int c, int l, const QString& p, const QString& s )
217 Constructs a parse exception with the error string \a name in the column
218 \a c and line \a l for the public identifier \a p and the system identifier
222 Returns the error message.
224 QString QXmlParseException::message() const
229 Returns the column number the error occurred.
231 int QXmlParseException::columnNumber() const
236 Returns the line number the error occurred.
238 int QXmlParseException::lineNumber() const
243 Returns the public identifier the error occurred.
245 QString QXmlParseException::publicId() const
250 Returns the system identifier the error occurred.
252 QString QXmlParseException::systemId() const
259 \class QXmlLocator qxml.h
260 \brief The QXmlLocator class provides the XML handler classes with
261 information about the actual parsing position.
265 The reader reports a QXmlLocator to the content handler before he starts to
266 parse the document. This is done with the
267 QXmlContentHandler::setDocumentLocator() function. The handler classes can
268 now use this locator to get the actual position the reader is at.
271 \fn QXmlLocator::QXmlLocator( QXmlSimpleReader* parent )
276 \fn QXmlLocator::~QXmlLocator()
281 Gets the column number (starting with 1) or -1 if there is no column number
284 int QXmlLocator::columnNumber()
286 return ( reader->columnNr == -1 ? -1 : reader->columnNr + 1 );
289 Gets the line number (starting with 1) or -1 if there is no line number
292 int QXmlLocator::lineNumber()
294 return ( reader->lineNr == -1 ? -1 : reader->lineNr + 1 );
298 /*********************************************
300 * QXmlNamespaceSupport
302 *********************************************/
305 \class QXmlNamespaceSupport qxml.h
306 \brief The QXmlNamespaceSupport class is a helper class for XML readers which
307 want to include namespace support.
311 It provides some functions that makes it easy to handle namespaces. Its main
312 use is for subclasses of QXmlReader which want to provide namespace
315 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
319 Constructs a QXmlNamespaceSupport.
321 QXmlNamespaceSupport::QXmlNamespaceSupport()
327 Destructs a QXmlNamespaceSupport.
329 QXmlNamespaceSupport::~QXmlNamespaceSupport()
334 This function declares a prefix in the current namespace context; the prefix
335 will remain in force until this context is popped, unless it is shadowed in a
338 Note that there is an asymmetry in this library: while prefix() will not
339 return the default "" prefix, even if you have declared one; to check for a
340 default prefix, you have to look it up explicitly using uri(). This
341 asymmetry exists to make it easier to look up prefixes for attribute names,
342 where the default prefix is not allowed.
344 void QXmlNamespaceSupport::setPrefix( const QString& pre, const QString& uri )
347 ns.insert( "", uri );
349 ns.insert( pre, uri );
354 Returns one of the prefixes mapped to a namespace URI.
356 If more than one prefix is currently mapped to the same URI, this function
357 will make an arbitrary selection; if you want all of the prefixes, use the
358 prefixes() function instead.
360 Note: this will never return the empty (default) prefix; to check for a
361 default prefix, use the uri() function with an argument of "".
363 QString QXmlNamespaceSupport::prefix( const QString& uri ) const
365 QMap<QString, QString>::ConstIterator itc, it = ns.begin();
366 while ( (itc=it) != ns.end() ) {
368 if ( itc.data() == uri && !itc.key().isEmpty() )
375 Looks up a prefix in the current context and returns the currently-mapped
376 namespace URI. Use the empty string ("") for the default namespace.
378 QString QXmlNamespaceSupport::uri( const QString& prefix ) const
380 const QString& returi = ns[ prefix ];
385 Splits the name at the ':' and returns the prefix and the local name.
387 void QXmlNamespaceSupport::splitName( const QString& qname,
388 QString& prefix, QString& localname ) const
392 for( pos=0; pos<qname.length(); pos++ ) {
393 if ( qname.at(pos) == ':' )
397 prefix = qname.left( pos );
398 localname = qname.mid( pos+1 );
402 Processes a raw XML 1.0 name in the current context by removing the prefix
403 and looking it up among the prefixes currently declared.
405 First parameter is the raw XML 1.0 name to be processed. The second parameter
406 is a flag whether the name is the name of an attribute (TRUE) or not (FALSE).
408 The return values will be stored in the last two parameters as follows:
410 <li> The namespace URI, or an empty string if none is in use.
411 <li> The local name (without prefix).
414 If the raw name has a prefix that has not been declared, then the return
417 Note that attribute names are processed differently than element names: an
418 unprefixed element name will received the default namespace (if any), while
419 an unprefixed element name will not
421 void QXmlNamespaceSupport::processName( const QString& qname,
423 QString& nsuri, QString& localname ) const
427 for( pos=0; pos<qname.length(); pos++ ) {
428 if ( qname.at(pos) == ':' )
431 if ( pos < qname.length() ) {
433 nsuri = uri( qname.left( pos ) );
434 localname = qname.mid( pos+1 );
438 nsuri = ""; // attributes don't take default namespace
440 nsuri = uri( "" ); // get default namespace
447 Returns an enumeration of all prefixes currently declared.
449 Note: if there is a default prefix, it will not be returned in this
450 enumeration; check for the default prefix using uri() with an argument
453 QStringList QXmlNamespaceSupport::prefixes() const
457 QMap<QString, QString>::ConstIterator itc, it = ns.begin();
458 while ( (itc=it) != ns.end() ) {
460 if ( !itc.key().isEmpty() )
461 list.append( itc.key() );
467 Returns a list of all prefixes currently declared for a URI.
469 The xml: prefix will be included. If you want only one prefix that's
470 mapped to the namespace URI, and you don't care which one you get, use the
471 prefix() function instead.
473 Note: the empty (default) prefix is never included in this enumeration; to
474 check for the presence of a default namespace, use uri() with an
477 QStringList QXmlNamespaceSupport::prefixes( const QString& uri ) const
481 QMap<QString, QString>::ConstIterator itc, it = ns.begin();
482 while ( (itc=it) != ns.end() ) {
484 if ( itc.data() == uri && !itc.key().isEmpty() )
485 list.append( itc.key() );
491 Starts a new namespace context.
493 Normally, you should push a new context at the beginning of each XML element:
494 the new context will automatically inherit the declarations of its parent
495 context, but it will also keep track of which declarations were made within
498 void QXmlNamespaceSupport::pushContext()
504 Reverts to the previous namespace context.
506 Normally, you should pop the context at the end of each XML element. After
507 popping the context, all namespace prefix mappings that were previously in
510 void QXmlNamespaceSupport::popContext()
512 if( !nsStack.isEmpty() )
517 Resets this namespace support object for reuse.
519 void QXmlNamespaceSupport::reset()
523 ns.insert( "xml", "http://www.w3.org/XML/1998/namespace" ); // the XML namespace
528 /*********************************************
532 *********************************************/
535 \class QXmlAttributes qxml.h
536 \brief The QXmlAttributes class provides XML attributes.
540 If attributes are reported by QXmlContentHandler::startElement() this
541 class is used to pass the attribute values. It provides you with different
542 functions to access the attribute names and values.
545 \fn QXmlAttributes::QXmlAttributes()
547 Constructs an empty attribute list.
550 \fn QXmlAttributes::~QXmlAttributes()
552 Destructs attributes.
556 Look up the index of an attribute by an XML 1.0 qualified name.
558 Returns the index of the attribute (starting with 0) or -1 if it wasn't
561 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
563 int QXmlAttributes::index( const QString& qName ) const
565 return qnameList.findIndex( qName );
569 Looks up the index of an attribute by a namespace name.
571 \a uri specifies the namespace URI, or the empty string if the name has no
572 namespace URI. \a localPart specifies the attribute's local name.
574 Returns the index of the attribute (starting with 0) or -1 if it wasn't
577 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
579 int QXmlAttributes::index( const QString& uri, const QString& localPart ) const
581 uint count = uriList.count();
582 for ( uint i=0; i<count; i++ ) {
583 if ( uriList[i] == uri && localnameList[i] == localPart )
590 Returns the number of attributes in the list.
592 int QXmlAttributes::length() const
594 return valueList.count();
598 Looks up an attribute's local name by index (starting with 0).
600 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
602 QString QXmlAttributes::localName( int index ) const
604 return localnameList[index];
608 Looks up an attribute's XML 1.0 qualified name by index (starting with 0).
610 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
612 QString QXmlAttributes::qName( int index ) const
614 return qnameList[index];
618 Looks up an attribute's namespace URI by index (starting with 0).
620 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
622 QString QXmlAttributes::uri( int index ) const
624 return uriList[index];
628 Looks up an attribute's type by index (starting with 0).
630 At the moment only 'CDATA' is returned.
632 QString QXmlAttributes::type( int ) const
638 Looks up an attribute's type by XML 1.0 qualified name.
640 At the moment only 'CDATA' is returned.
642 QString QXmlAttributes::type( const QString& ) const
648 Looks up an attribute's type by namespace name.
650 The first parameter specifies the namespace URI, or the empty string if
651 the name has no namespace URI. The second parameter specifies the
652 attribute's local name.
654 At the moment only 'CDATA' is returned.
656 QString QXmlAttributes::type( const QString&, const QString& ) const
662 Looks up an attribute's value by index (starting with 0).
664 QString QXmlAttributes::value( int index ) const
666 return valueList[index];
670 Looks up an attribute's value by XML 1.0 qualified name.
672 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
674 QString QXmlAttributes::value( const QString& qName ) const
676 int i = index( qName );
678 return QString::null;
679 return valueList[ i ];
683 Looks up an attribute's value by namespace name.
685 \a uri specifies the namespace URI, or the empty string if the name has no
686 namespace URI. \a localName specifies the attribute's local name.
688 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
690 QString QXmlAttributes::value( const QString& uri, const QString& localName ) const
692 int i = index( uri, localName );
694 return QString::null;
695 return valueList[ i ];
699 /*********************************************
703 *********************************************/
706 \class QXmlInputSource qxml.h
707 \brief The QXmlInputSource class is the source where XML data is read from.
711 All subclasses of QXmlReader read the input from this class.
715 Returns all the data this input source contains.
717 const QString& QXmlInputSource::data() const
723 Constructs a input source which contains no data.
725 QXmlInputSource::QXmlInputSource( )
731 Constructs a input source and get the data from the text stream.
733 QXmlInputSource::QXmlInputSource( QTextStream& stream )
736 if ( stream.device()->isDirectAccess() ) {
737 rawData = stream.device()->readAll();
740 const int bufsize = 512;
741 while ( !stream.device()->atEnd() ) {
742 rawData.resize( nread + bufsize );
743 nread += stream.device()->readBlock( rawData.data()+nread, bufsize );
745 rawData.resize( nread );
747 readInput( rawData );
751 Constructs a input source and get the data from a file. If the file cannot be
752 read the input source is empty.
754 QXmlInputSource::QXmlInputSource( QFile& file )
756 if ( !file.open(IO_ReadOnly) ) {
760 QByteArray rawData = file.readAll();
761 readInput( rawData );
768 QXmlInputSource::~QXmlInputSource()
773 Sets the data of the input source to \a dat.
775 void QXmlInputSource::setData( const QString& dat )
781 Read the XML file from the byte array; try to recoginize the encoding.
783 // ### The input source should not do the encoding detection!
784 void QXmlInputSource::readInput( QByteArray& rawData )
786 QBuffer buf( rawData );
787 buf.open( IO_ReadOnly );
788 QTextStream *stream = new QTextStream( &buf );
790 // assume UTF8 or UTF16 at first
791 stream->setEncoding( QTextStream::UnicodeUTF8 );
793 // read the first 5 characters
794 for ( int i=0; i<5; i++ ) {
798 // starts the document with an XML declaration?
799 if ( input == "<?xml" ) {
800 // read the whole XML declaration
804 } while( tmp != '>' );
805 // and try to find out if there is an encoding
806 int pos = input.find( "encoding" );
811 if ( pos > (int)input.length() )
813 } while( input[pos] != '"' && input[pos] != '\'' );
815 while( input[pos] != '"' && input[pos] != '\'' ) {
816 encoding += input[pos];
818 if ( pos > (int)input.length() )
822 stream = new QTextStream( &buf );
823 stream->setCodec( QTextCodec::codecForName( encoding.utf8() ) );
829 input += stream->read();
835 /*********************************************
839 *********************************************/
842 \class QXmlContentHandler qxml.h
843 \brief The QXmlContentHandler class provides an interface to report logical
848 If the application needs to be informed of basic parsing events, it
849 implements this interface and sets it with QXmlReader::setContentHandler().
850 The reader reports basic document-related events like the start and end of
851 elements and character data through this interface.
853 The order of events in this interface is very important, and mirrors the
854 order of information in the document itself. For example, all of an element's
855 content (character data, processing instructions, and/or subelements) will
856 appear, in order, between the startElement() event and the corresponding
859 The class QXmlDefaultHandler gives a default implementation for this
860 interface; subclassing from this class is very convenient if you want only be
861 informed of some parsing events.
863 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
865 \sa QXmlDTDHandler QXmlDeclHandler QXmlEntityResolver QXmlErrorHandler
869 \fn void QXmlContentHandler::setDocumentLocator( QXmlLocator* locator )
871 The reader calls this function before he starts parsing the document. The
872 argument \a locator is a pointer to a QXmlLocator which allows the
873 application to get the actual position of the parsing in the document.
875 Do not destroy the \a locator; it is destroyed when the reader is destroyed
876 (do not use the \a locator after the reader got destroyed).
879 \fn bool QXmlContentHandler::startDocument()
881 The reader calls this function when he starts parsing the document.
882 The reader will call this function only once before any other functions in
883 this class or in the QXmlDTDHandler class are called (except
884 QXmlContentHandler::setDocumentLocator()).
886 If this function returns FALSE the reader will stop parsing and will report
887 an error. The reader will use the function errorString() to get the error
888 message that will be used for reporting the error.
893 \fn bool QXmlContentHandler::endDocument()
895 The reader calls this function after he has finished the parsing. It
896 is only called once. It is the last function of all handler functions that is
897 called. It is called after the reader has read all input or has abandoned
898 parsing because of a fatal error.
900 If this function returns FALSE the reader will stop parsing and will report
901 an error. The reader will use the function errorString() to get the error
902 message that will be used for reporting the error.
907 \fn bool QXmlContentHandler::startPrefixMapping( const QString& prefix, const QString& uri )
909 The reader calls this function to signal the begin of a prefix-URI
910 namespace mapping scope. This information is not necessary for normal
911 namespace processing since the reader automatically replaces prefixes for
912 element and attribute names.
914 Note that startPrefixMapping and endPrefixMapping calls are not guaranteed to
915 be properly nested relative to each-other: all startPrefixMapping events will
916 occur before the corresponding startElement event, and all endPrefixMapping
917 events will occur after the corresponding endElement event, but their order
918 is not otherwise guaranteed.
920 The argument \a prefix is the namespace prefix being declared and the
921 argument \a uri is the namespace URI the prefix is mapped to.
923 If this function returns FALSE the reader will stop parsing and will report
924 an error. The reader will use the function errorString() to get the error
925 message that will be used for reporting the error.
927 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
929 \sa endPrefixMapping()
932 \fn bool QXmlContentHandler::endPrefixMapping( const QString& prefix )
934 The reader calls this function to signal the end of a prefix mapping.
936 If this function returns FALSE the reader will stop parsing and will report
937 an error. The reader will use the function errorString() to get the error
938 message that will be used for reporting the error.
940 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
942 \sa startPrefixMapping()
945 \fn bool QXmlContentHandler::startElement( const QString& namespaceURI, const QString& localName, const QString& qName, const QXmlAttributes& atts )
947 The reader calls this function when he has parsed a start element tag.
949 There will be a corresponding endElement() call when the corresponding end
950 element tag was read. The startElement() and endElement() calls are always
951 nested correctly. Empty element tags (e.g. <a/>) are reported by
952 startElement() directly followed by a call to endElement().
954 The attribute list provided will contain only attributes with explicit
955 values. The attribute list will contain attributes used for namespace
956 declaration (i.e. attributes starting with xmlns) only if the
957 namespace-prefix property of the reader is TRUE.
959 The argument \a uri is the namespace URI, or the empty string if the element
960 has no namespace URI or if namespace processing is not being performed, \a
961 localName is the local name (without prefix), or the empty string if
962 namespace processing is not being performed, \a qName is the qualified name
963 (with prefix), or the empty string if qualified names are not available and
964 \a atts are the attributes attached to the element. If there are no
965 attributes, \a atts is an empty attributes object
967 If this function returns FALSE the reader will stop parsing and will report
968 an error. The reader will use the function errorString() to get the error
969 message that will be used for reporting the error.
971 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
976 \fn bool QXmlContentHandler::endElement( const QString& namespaceURI, const QString& localName, const QString& qName )
978 The reader calls this function when he has parsed an end element tag.
980 If this function returns FALSE the reader will stop parsing and will report
981 an error. The reader will use the function errorString() to get the error
982 message that will be used for reporting the error.
984 See also the <a href="xml-sax.html#namespaces">namespace description</a>.
989 \fn bool QXmlContentHandler::characters( const QString& ch )
991 The reader calls this function when he has parsed a chunk of character
992 data (either normal character data or character data inside a CDATA section;
993 if you have to distinguish between those two types you have to use
994 QXmlLexicalHandler::startCDATA() and QXmlLexicalHandler::endCDATA() in
997 Some readers will report whitespace in element content using the
998 ignorableWhitespace() function rather than this one (QXmlSimpleReader will
1001 A reader is allowed to report the character data of an element in more than
1002 one chunk; e.g. a reader might want to report "a &lt; b" in three
1003 characters() events ("a ", "<" and " b").
1005 If this function returns FALSE the reader will stop parsing and will report
1006 an error. The reader will use the function errorString() to get the error
1007 message that will be used for reporting the error.
1010 \fn bool QXmlContentHandler::ignorableWhitespace( const QString& ch )
1012 Some readers may use this function to report each chunk of whitespace in
1013 element content (QXmlSimpleReader does not though).
1015 If this function returns FALSE the reader will stop parsing and will report
1016 an error. The reader will use the function errorString() to get the error
1017 message that will be used for reporting the error.
1020 \fn bool QXmlContentHandler::processingInstruction( const QString& target, const QString& data )
1022 The reader calls this function when he has parsed a processing
1025 \a target is the target name of the processing instruction and \a data is the
1026 data of the processing instruction.
1028 If this function returns FALSE the reader will stop parsing and will report
1029 an error. The reader will use the function errorString() to get the error
1030 message that will be used for reporting the error.
1033 \fn bool QXmlContentHandler::skippedEntity( const QString& name )
1035 Some readers may skip entities if they have not seen the declarations (e.g.
1036 because they are in an external DTD). If they do so they will report it by
1037 calling this function.
1039 If this function returns FALSE the reader will stop parsing and will report
1040 an error. The reader will use the function errorString() to get the error
1041 message that will be used for reporting the error.
1044 \fn QString QXmlContentHandler::errorString()
1046 The reader calls this function to get an error string if any of the handler
1047 functions returns FALSE to him.
1052 \class QXmlErrorHandler qxml.h
1053 \brief The QXmlErrorHandler class provides an interface to report errors in
1058 If the application is interested in reporting errors to the user or any other
1059 customized error handling, you should subclass this class.
1061 You can set the error handler with QXmlReader::setErrorHandler().
1063 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1065 \sa QXmlDTDHandler QXmlDeclHandler QXmlContentHandler QXmlEntityResolver
1069 \fn bool QXmlErrorHandler::warning( const QXmlParseException& exception )
1071 A reader might use this function to report a warning. Warnings are conditions
1072 that are not errors or fatal errors as defined by the XML 1.0 specification.
1074 If this function returns FALSE the reader will stop parsing and will report
1075 an error. The reader will use the function errorString() to get the error
1076 message that will be used for reporting the error.
1079 \fn bool QXmlErrorHandler::error( const QXmlParseException& exception )
1081 A reader might use this function to report a recoverable error. A recoverable
1082 error corresponds to the definition of "error" in section 1.2 of the XML 1.0
1085 The reader must continue to provide normal parsing events after invoking this
1088 If this function returns FALSE the reader will stop parsing and will report
1089 an error. The reader will use the function errorString() to get the error
1090 message that will be used for reporting the error.
1093 \fn bool QXmlErrorHandler::fatalError( const QXmlParseException& exception )
1095 A reader must use this function to report a non-recoverable error.
1097 If this function returns TRUE the reader might try to go on parsing and
1098 reporting further errors; but no regular parsing events are reported.
1101 \fn QString QXmlErrorHandler::errorString()
1103 The reader calls this function to get an error string if any of the handler
1104 functions returns FALSE to him.
1109 \class QXmlDTDHandler qxml.h
1110 \brief The QXmlDTDHandler class provides an interface to report DTD content
1115 If an application needs information about notations and unparsed entities,
1116 then the application implements this interface and registers an instance with
1117 QXmlReader::setDTDHandler().
1119 Note that this interface includes only those DTD events that the XML
1120 recommendation requires processors to report: notation and unparsed entity
1123 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1125 \sa QXmlDeclHandler QXmlContentHandler QXmlEntityResolver QXmlErrorHandler
1129 \fn bool QXmlDTDHandler::notationDecl( const QString& name, const QString& publicId, const QString& systemId )
1131 The reader calls this function when he has parsed a notation
1134 The argument \a name is the notation name, \a publicId is the notations's
1135 public identifier and \a systemId is the notations's system identifier.
1137 If this function returns FALSE the reader will stop parsing and will report
1138 an error. The reader will use the function errorString() to get the error
1139 message that will be used for reporting the error.
1142 \fn bool QXmlDTDHandler::unparsedEntityDecl( const QString& name, const QString& publicId, const QString& systemId, const QString& notationName )
1144 The reader calls this function when he finds an unparsed entity declaration.
1146 The argument \a name is the unparsed entity's name, \a publicId is the
1147 entity's public identifier, \a systemId is the entity's system identifier and
1148 \a notation is the name of the associated notation.
1150 If this function returns FALSE the reader will stop parsing and will report
1151 an error. The reader will use the function errorString() to get the error
1152 message that will be used for reporting the error.
1155 \fn QString QXmlDTDHandler::errorString()
1157 The reader calls this function to get an error string if any of the handler
1158 functions returns FALSE to him.
1163 \class QXmlEntityResolver qxml.h
1164 \brief The QXmlEntityResolver class provides an interface to resolve extern
1165 entities contained in XML data.
1169 If an application needs to implement customized handling for external
1170 entities, it must implement this interface and register it with
1171 QXmlReader::setEntityResolver().
1173 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1175 \sa QXmlDTDHandler QXmlDeclHandler QXmlContentHandler QXmlErrorHandler
1179 \fn bool QXmlEntityResolver::resolveEntity( const QString& publicId, const QString& systemId, QXmlInputSource* ret )
1181 The reader will call this function before he opens any external entity,
1182 except the top-level document entity. The application may request the reader
1183 to resolve the entity itself (\a ret is 0) or to use an entirely different
1184 input source (\a ret points to the input source).
1186 The reader will delete the input source \a ret when he no longer needs it. So
1187 you should allocate it on the heap with \c new.
1189 The argument \a publicId is the public identifier of the external entity, \a
1190 systemId is the system identifier of the external entity and \a ret is the
1191 return value of this function: if it is 0 the reader should resolve the
1192 entity itself, if it is non-zero it must point to an input source which the
1193 reader will use instead.
1195 If this function returns FALSE the reader will stop parsing and will report
1196 an error. The reader will use the function errorString() to get the error
1197 message that will be used for reporting the error.
1200 \fn QString QXmlEntityResolver::errorString()
1202 The reader calls this function to get an error string if any of the handler
1203 functions returns FALSE to him.
1208 \class QXmlLexicalHandler qxml.h
1209 \brief The QXmlLexicalHandler class provides an interface to report lexical
1210 content of XML data.
1214 The events in the lexical handler apply to the entire document, not just to
1215 the document element, and all lexical handler events appear between the
1216 content handler's startDocument and endDocument events.
1218 You can set the lexical handler with QXmlReader::setLexicalHandler().
1220 This interface is designed after the SAX2 extension LexicalHandler. The
1221 functions startEntity() and endEntity() are not included though.
1223 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1225 \sa QXmlDTDHandler QXmlDeclHandler QXmlContentHandler QXmlEntityResolver
1229 \fn bool QXmlLexicalHandler::startDTD( const QString& name, const QString& publicId, const QString& systemId )
1231 The reader calls this function to report the start of a DTD declaration, if
1234 All declarations reported through QXmlDTDHandler or QXmlDeclHandler appear
1235 between the startDTD() and endDTD() calls.
1237 If this function returns FALSE the reader will stop parsing and will report
1238 an error. The reader will use the function errorString() to get the error
1239 message that will be used for reporting the error.
1244 \fn bool QXmlLexicalHandler::endDTD()
1246 The reader calls this function to report the end of a DTD declaration, if
1249 If this function returns FALSE the reader will stop parsing and will report
1250 an error. The reader will use the function errorString() to get the error
1251 message that will be used for reporting the error.
1256 \fn bool QXmlLexicalHandler::startCDATA()
1258 The reader calls this function to report the start of a CDATA section. The
1259 content of the CDATA section will be reported through the regular
1260 QXmlContentHandler::characters(). This function is intended only to report
1263 If this function returns FALSE the reader will stop parsing and will report
1264 an error. The reader will use the function errorString() to get the error
1265 message that will be used for reporting the error.
1270 \fn bool QXmlLexicalHandler::endCDATA()
1272 The reader calls this function to report the end of a CDATA section.
1274 If this function returns FALSE the reader will stop parsing and will report
1275 an error. The reader will use the function errorString() to get the error
1276 message that will be used for reporting the error.
1281 \fn bool QXmlLexicalHandler::comment( const QString& ch )
1283 The reader calls this function to report an XML comment anywhere in the
1286 If this function returns FALSE the reader will stop parsing and will report
1287 an error. The reader will use the function errorString() to get the error
1288 message that will be used for reporting the error.
1291 \fn QString QXmlLexicalHandler::errorString()
1293 The reader calls this function to get an error string if any of the handler
1294 functions returns FALSE to him.
1299 \class QXmlDeclHandler qxml.h
1300 \brief The QXmlDeclHandler class provides an interface to report declaration
1301 content of XML data.
1305 You can set the declaration handler with QXmlReader::setDeclHandler().
1307 This interface is designed after the SAX2 extension DeclHandler.
1309 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1311 \sa QXmlDTDHandler QXmlContentHandler QXmlEntityResolver QXmlErrorHandler
1315 \fn bool QXmlDeclHandler::attributeDecl( const QString& eName, const QString& aName, const QString& type, const QString& valueDefault, const QString& value )
1317 The reader calls this function to report an attribute type declaration. Only
1318 the effective (first) declaration for an attribute will be reported.
1320 If this function returns FALSE the reader will stop parsing and will report
1321 an error. The reader will use the function errorString() to get the error
1322 message that will be used for reporting the error.
1325 \fn bool QXmlDeclHandler::internalEntityDecl( const QString& name, const QString& value )
1327 The reader calls this function to report an internal entity declaration. Only
1328 the effective (first) declaration will be reported.
1330 If this function returns FALSE the reader will stop parsing and will report
1331 an error. The reader will use the function errorString() to get the error
1332 message that will be used for reporting the error.
1335 \fn bool QXmlDeclHandler::externalEntityDecl( const QString& name, const QString& publicId, const QString& systemId )
1337 The reader calls this function to report a parsed external entity
1338 declaration. Only the effective (first) declaration for each entity will be
1341 If this function returns FALSE the reader will stop parsing and will report
1342 an error. The reader will use the function errorString() to get the error
1343 message that will be used for reporting the error.
1346 \fn QString QXmlDeclHandler::errorString()
1348 The reader calls this function to get an error string if any of the handler
1349 functions returns FALSE to him.
1354 \class QXmlDefaultHandler qxml.h
1355 \brief The QXmlDefaultHandler class provides a default implementation of all
1356 XML handler classes.
1360 Very often you are only interested in parts of the things that that the
1361 reader reports to you. This class simply implements a default behaviour of
1362 the handler classes (most of the time: do nothing). Normally this is the
1363 class you subclass for implementing your customized handler.
1365 See also the <a href="xml.html#introSAX2">Introduction to SAX2</a>.
1367 \sa QXmlDTDHandler QXmlDeclHandler QXmlContentHandler QXmlEntityResolver
1368 QXmlErrorHandler QXmlLexicalHandler
1371 \fn QXmlDefaultHandler::QXmlDefaultHandler()
1376 \fn QXmlDefaultHandler::~QXmlDefaultHandler()
1384 void QXmlDefaultHandler::setDocumentLocator( QXmlLocator* )
1391 bool QXmlDefaultHandler::startDocument()
1399 bool QXmlDefaultHandler::endDocument()
1407 bool QXmlDefaultHandler::startPrefixMapping( const QString&, const QString& )
1415 bool QXmlDefaultHandler::endPrefixMapping( const QString& )
1423 bool QXmlDefaultHandler::startElement( const QString&, const QString&,
1424 const QString&, const QXmlAttributes& )
1432 bool QXmlDefaultHandler::endElement( const QString&, const QString&,
1441 bool QXmlDefaultHandler::characters( const QString& )
1449 bool QXmlDefaultHandler::ignorableWhitespace( const QString& )
1457 bool QXmlDefaultHandler::processingInstruction( const QString&,
1466 bool QXmlDefaultHandler::skippedEntity( const QString& )
1474 bool QXmlDefaultHandler::warning( const QXmlParseException& )
1482 bool QXmlDefaultHandler::error( const QXmlParseException& )
1490 bool QXmlDefaultHandler::fatalError( const QXmlParseException& )
1498 bool QXmlDefaultHandler::notationDecl( const QString&, const QString&,
1507 bool QXmlDefaultHandler::unparsedEntityDecl( const QString&, const QString&,
1508 const QString&, const QString& )
1514 Always sets \a ret to 0, so that the reader will use the system identifier
1515 provided in the XML document.
1517 bool QXmlDefaultHandler::resolveEntity( const QString&, const QString&,
1518 QXmlInputSource* &ret )
1525 Returns the default error string.
1527 QString QXmlDefaultHandler::errorString()
1529 return QString( XMLERR_ERRORBYCONSUMER );
1535 bool QXmlDefaultHandler::startDTD( const QString&, const QString&, const QString& )
1543 bool QXmlDefaultHandler::endDTD()
1552 bool QXmlDefaultHandler::startEntity( const QString& )
1560 bool QXmlDefaultHandler::endEntity( const QString& )
1569 bool QXmlDefaultHandler::startCDATA()
1577 bool QXmlDefaultHandler::endCDATA()
1585 bool QXmlDefaultHandler::comment( const QString& )
1593 bool QXmlDefaultHandler::attributeDecl( const QString&, const QString&, const QString&, const QString&, const QString& )
1601 bool QXmlDefaultHandler::internalEntityDecl( const QString&, const QString& )
1609 bool QXmlDefaultHandler::externalEntityDecl( const QString&, const QString&, const QString& )
1615 /*********************************************
1617 * QXmlSimpleReaderPrivate
1619 *********************************************/
1621 class QXmlSimpleReaderPrivate
1625 QXmlSimpleReaderPrivate()
1629 // used for entity declarations
1630 struct ExternParameterEntity
1632 ExternParameterEntity( ) {}
1633 ExternParameterEntity( const QString &p, const QString &s )
1634 : publicId(p), systemId(s) {}
1641 ExternEntity( const QString &p, const QString &s, const QString &n )
1642 : publicId(p), systemId(s), notation(n) {}
1647 QMap<QString,ExternParameterEntity> externParameterEntities;
1648 QMap<QString,QString> parameterEntities;
1649 QMap<QString,ExternEntity> externEntities;
1650 QMap<QString,QString> entities;
1652 // used for standalone declaration
1653 enum Standalone { Yes, No, Unknown };
1655 QString doctype; // only used for the doctype
1656 QString xmlVersion; // only used to store the version information
1657 QString encoding; // only used to store the encoding
1658 Standalone standalone; // used to store the value of the standalone declaration
1660 QString publicId; // used by parseExternalID() to store the public ID
1661 QString systemId; // used by parseExternalID() to store the system ID
1662 QString attDeclEName; // use by parseAttlistDecl()
1663 QString attDeclAName; // use by parseAttlistDecl()
1665 // flags for some features support
1667 bool useNamespacePrefixes;
1668 bool reportWhitespaceCharData;
1670 // used to build the attribute list
1671 QXmlAttributes attList;
1674 QXmlLocator *locator;
1675 QXmlNamespaceSupport namespaceSupport;
1680 // friend declarations
1681 friend class QXmlSimpleReader;
1685 /*********************************************
1689 *********************************************/
1692 \class QXmlReader qxml.h
1693 \brief The QXmlReader class provides an interface for XML readers (i.e.
1698 This abstract class describes an interface for all XML readers in Qt. At the
1699 moment there is only one implementation of a reader included in the XML
1700 module of Qt (QXmlSimpleReader). In future releases there might be more
1701 readers with different properties available (e.g. a validating parser).
1703 The design of the XML classes follow the
1704 <a href="http://www.megginson.com/SAX/">SAX2 java interface</a>.
1705 It was adopted to fit into the Qt naming conventions; so it should be very
1706 easy for anybody who has worked with SAX2 to get started with the Qt XML
1709 All readers use the class QXmlInputSource to read the input document from.
1710 Since you are normally interested in certain contents of the XML document,
1711 the reader reports those contents through special handler classes
1712 (QXmlDTDHandler, QXmlDeclHandler, QXmlContentHandler, QXmlEntityResolver,
1713 QXmlErrorHandler and QXmlLexicalHandler).
1715 You have to subclass these classes. Since the handler classes describe only
1716 interfaces you must implement all functions; there is a class
1717 (QXmlDefaultHandler) to make this easier; it implements a default behaviour
1718 (do nothing) for all functions.
1720 For getting started see also the
1721 <a href="xml-sax.html#quickStart">Quick start</a>.
1723 \sa QXmlSimpleReader
1726 \fn bool QXmlReader::feature( const QString& name, bool *ok ) const
1728 If the reader has the feature \a name, this function returns the value of the
1731 If the reader has not the feature \a name, the return value may be anything.
1733 If \a ok is not 0, then \a ok is set to TRUE if the reader has the feature
1734 \a name, otherwise \a ok is set to FALSE.
1736 \sa setFeature() hasFeature()
1739 \fn void QXmlReader::setFeature( const QString& name, bool value )
1741 Sets the feature \a name to \a value. If the reader has not the feature \a
1742 name, this value is ignored.
1744 \sa feature() hasFeature()
1747 \fn bool QXmlReader::hasFeature( const QString& name ) const
1749 Returns \c TRUE if the reader has the feature \a name, otherwise FALSE.
1751 \sa feature() setFeature()
1754 \fn void* QXmlReader::property( const QString& name, bool *ok ) const
1756 If the reader has the property \a name, this function returns the value of
1759 If the reader has not the property \a name, the return value is 0.
1761 If \a ok is not 0, then \a ok is set to TRUE if the reader has the property
1762 \a name, otherwise \a ok is set to FALSE.
1764 \sa setProperty() hasProperty()
1767 \fn void QXmlReader::setProperty( const QString& name, void* value )
1769 Sets the property \a name to \a value. If the reader has not the property \a
1770 name, this value is ignored.
1772 \sa property() hasProperty()
1775 \fn bool QXmlReader::hasProperty( const QString& name ) const
1777 Returns TRUE if the reader has the property \a name, otherwise FALSE.
1779 \sa property() setProperty()
1782 \fn void QXmlReader::setEntityResolver( QXmlEntityResolver* handler )
1784 Sets the entity resolver to \a handler.
1786 \sa entityResolver()
1789 \fn QXmlEntityResolver* QXmlReader::entityResolver() const
1791 Returns the entity resolver or 0 if none was set.
1793 \sa setEntityResolver()
1796 \fn void QXmlReader::setDTDHandler( QXmlDTDHandler* handler )
1798 Sets the DTD handler to \a handler.
1803 \fn QXmlDTDHandler* QXmlReader::DTDHandler() const
1805 Returns the DTD handler or 0 if none was set.
1810 \fn void QXmlReader::setContentHandler( QXmlContentHandler* handler )
1812 Sets the content handler to \a handler.
1814 \sa contentHandler()
1817 \fn QXmlContentHandler* QXmlReader::contentHandler() const
1819 Returns the content handler or 0 if none was set.
1821 \sa setContentHandler()
1824 \fn void QXmlReader::setErrorHandler( QXmlErrorHandler* handler )
1826 Sets the error handler to \a handler.
1831 \fn QXmlErrorHandler* QXmlReader::errorHandler() const
1833 Returns the error handler or 0 if none was set
1835 \sa setErrorHandler()
1838 \fn void QXmlReader::setLexicalHandler( QXmlLexicalHandler* handler )
1840 Sets the lexical handler to \a handler.
1842 \sa lexicalHandler()
1845 \fn QXmlLexicalHandler* QXmlReader::lexicalHandler() const
1847 Returns the lexical handler or 0 if none was set.
1849 \sa setLexicalHandler()
1852 \fn void QXmlReader::setDeclHandler( QXmlDeclHandler* handler )
1854 Sets the declaration handler to \a handler.
1859 \fn QXmlDeclHandler* QXmlReader::declHandler() const
1861 Returns the declaration handler or 0 if none was set.
1863 \sa setDeclHandler()
1866 \fn bool QXmlReader::parse( const QXmlInputSource& input )
1868 Parses the XML document \a input. Returns TRUE if the parsing was successful,
1872 \fn bool QXmlReader::parse( const QString& systemId )
1874 Parses the XML document at the location \a systemId. Returns TRUE if the
1875 parsing was successful, otherwise FALSE.
1880 \class QXmlSimpleReader qxml.h
1881 \brief The QXmlSimpleReader class provides an implementation of a simple XML
1882 reader (i.e. parser).
1886 This XML reader is sufficient for simple parsing tasks. Here is a short list
1887 of the properties of this reader:
1889 <li> well-formed parser
1890 <li> does not parse any external entities
1891 <li> can do namespace processing
1894 For getting started see also the
1895 <a href="xml-sax.html#quickStart">Quick start</a>.
1898 //guaranteed not to be a characater
1899 const QChar QXmlSimpleReader::QEOF = QChar((ushort)0xffff);
1902 Constructs a simple XML reader.
1904 QXmlSimpleReader::QXmlSimpleReader()
1906 d = new QXmlSimpleReaderPrivate();
1907 d->locator = new QXmlLocator( this );
1916 // default feature settings
1917 d->useNamespaces = TRUE;
1918 d->useNamespacePrefixes = FALSE;
1919 d->reportWhitespaceCharData = TRUE;
1923 Destroys a simple XML reader.
1925 QXmlSimpleReader::~QXmlSimpleReader()
1932 Gets the state of a feature.
1934 \sa setFeature() hasFeature()
1936 bool QXmlSimpleReader::feature( const QString& name, bool *ok ) const
1940 if ( name == "http://xml.org/sax/features/namespaces" ) {
1941 return d->useNamespaces;
1942 } else if ( name == "http://xml.org/sax/features/namespace-prefixes" ) {
1943 return d->useNamespacePrefixes;
1944 } else if ( name == "http://trolltech.com/xml/features/report-whitespace-only-CharData" ) {
1945 return d->reportWhitespaceCharData;
1947 qWarning( "Unknown feature %s", name.ascii() );
1955 Sets the state of a feature.
1957 Supported features are:
1959 <li> http://xml.org/sax/features/namespaces:
1960 if this feature is TRUE, namespace processing is performed
1961 <li> http://xml.org/sax/features/namespace-prefixes:
1962 if this feature is TRUE, the the original prefixed names and attributes
1963 used for namespace declarations are reported
1964 <li> http://trolltech.com/xml/features/report-whitespace-only-CharData:
1965 if this feature is TRUE, CharData that consists only of whitespace (and
1966 no other characters) is not reported via
1967 QXmlContentHandler::characters()
1970 \sa feature() hasFeature()
1972 void QXmlSimpleReader::setFeature( const QString& name, bool value )
1974 if ( name == "http://xml.org/sax/features/namespaces" ) {
1975 d->useNamespaces = value;
1976 } else if ( name == "http://xml.org/sax/features/namespace-prefixes" ) {
1977 d->useNamespacePrefixes = value;
1978 } else if ( name == "http://trolltech.com/xml/features/report-whitespace-only-CharData" ) {
1979 d->reportWhitespaceCharData = value;
1981 qWarning( "Unknown feature %s", name.ascii() );
1986 Returns TRUE if the class has a feature named \a feature, otherwise FALSE.
1988 \sa setFeature() feature()
1990 bool QXmlSimpleReader::hasFeature( const QString& name ) const
1992 if ( name == "http://xml.org/sax/features/namespaces" ||
1993 name == "http://xml.org/sax/features/namespace-prefixes" ||
1994 name == "http://trolltech.com/xml/features/report-whitespace-only-CharData" ) {
2002 Returns 0 since this class does not support any properties.
2004 void* QXmlSimpleReader::property( const QString&, bool *ok ) const
2012 Does nothing since this class does not support any properties.
2014 void QXmlSimpleReader::setProperty( const QString&, void* )
2019 Returns FALSE since this class does not support any properties.
2021 bool QXmlSimpleReader::hasProperty( const QString& ) const
2027 void QXmlSimpleReader::setEntityResolver( QXmlEntityResolver* handler )
2028 { entityRes = handler; }
2031 QXmlEntityResolver* QXmlSimpleReader::entityResolver() const
2032 { return entityRes; }
2035 void QXmlSimpleReader::setDTDHandler( QXmlDTDHandler* handler )
2036 { dtdHnd = handler; }
2039 QXmlDTDHandler* QXmlSimpleReader::DTDHandler() const
2043 void QXmlSimpleReader::setContentHandler( QXmlContentHandler* handler )
2044 { contentHnd = handler; }
2047 QXmlContentHandler* QXmlSimpleReader::contentHandler() const
2048 { return contentHnd; }
2051 void QXmlSimpleReader::setErrorHandler( QXmlErrorHandler* handler )
2052 { errorHnd = handler; }
2055 QXmlErrorHandler* QXmlSimpleReader::errorHandler() const
2056 { return errorHnd; }
2059 void QXmlSimpleReader::setLexicalHandler( QXmlLexicalHandler* handler )
2060 { lexicalHnd = handler; }
2063 QXmlLexicalHandler* QXmlSimpleReader::lexicalHandler() const
2064 { return lexicalHnd; }
2067 void QXmlSimpleReader::setDeclHandler( QXmlDeclHandler* handler )
2068 { declHnd = handler; }
2071 QXmlDeclHandler* QXmlSimpleReader::declHandler() const
2077 bool QXmlSimpleReader::parse( const QXmlInputSource& input )
2082 contentHnd->setDocumentLocator( d->locator );
2083 if ( !contentHnd->startDocument() ) {
2084 d->error = contentHnd->errorString();
2089 if ( !parseProlog() ) {
2090 d->error = XMLERR_ERRORPARSINGPROLOG;
2094 if ( !parseElement() ) {
2095 d->error = XMLERR_ERRORPARSINGMAINELEMENT;
2099 while ( !atEnd() ) {
2100 if ( !parseMisc() ) {
2101 d->error = XMLERR_ERRORPARSINGMISC;
2106 if ( !tags.isEmpty() ) {
2107 d->error = XMLERR_UNEXPECTEDEOF;
2112 if ( !contentHnd->endDocument() ) {
2113 d->error = contentHnd->errorString();
2129 Parses the prolog [22].
2131 bool QXmlSimpleReader::parseProlog()
2133 bool xmldecl_possible = TRUE;
2134 bool doctype_read = FALSE;
2136 const signed char Init = 0;
2137 const signed char EatWS = 1; // eat white spaces
2138 const signed char Lt = 2; // '<' read
2139 const signed char Em = 3; // '!' read
2140 const signed char DocType = 4; // read doctype
2141 const signed char Comment = 5; // read comment
2142 const signed char PI = 6; // read PI
2143 const signed char Done = 7;
2145 const signed char InpWs = 0;
2146 const signed char InpLt = 1; // <
2147 const signed char InpQm = 2; // ?
2148 const signed char InpEm = 3; // !
2149 const signed char InpD = 4; // D
2150 const signed char InpDash = 5; // -
2151 const signed char InpUnknown = 6;
2153 // use some kind of state machine for parsing
2154 static signed char table[7][7] = {
2155 /* InpWs InpLt InpQm InpEm InpD InpDash InpUnknown */
2156 { EatWS, Lt, -1, -1, -1, -1, -1 }, // Init
2157 { -1, Lt, -1, -1, -1, -1, -1 }, // EatWS
2158 { -1, -1, PI, Em, Done, -1, Done }, // Lt
2159 { -1, -1, -1, -1, DocType, Comment, -1 }, // Em
2160 { EatWS, Lt, -1, -1, -1, -1, -1 }, // DocType
2161 { EatWS, Lt, -1, -1, -1, -1, -1 }, // Comment
2162 { EatWS, Lt, -1, -1, -1, -1, -1 } // PI
2164 signed char state = Init;
2166 bool parseOk = TRUE;
2172 d->error = XMLERR_UNEXPECTEDEOF;
2177 } else if ( c == '<' ) {
2179 } else if ( c == '?' ) {
2181 } else if ( c == '!' ) {
2183 } else if ( c == 'D' ) {
2185 } else if ( c == '-' ) {
2191 state = table[state][input];
2193 // in some cases do special actions depending on state
2196 // XML declaration only on first position possible
2197 xmldecl_possible = FALSE;
2206 // XML declaration only on first position possible
2207 xmldecl_possible = FALSE;
2212 parseOk = parseDoctype();
2215 parseOk = parseComment();
2218 parseOk = parsePI( xmldecl_possible );
2221 // no input is read after this
2225 d->error = XMLERR_ERRORPARSINGPROLOG;
2228 if ( doctype_read ) {
2229 d->error = XMLERR_MORETHANONEDOCTYPE;
2232 doctype_read = FALSE;
2237 d->error = XMLERR_ERRORPARSINGPROLOG;
2241 if ( !lexicalHnd->comment( string() ) ) {
2242 d->error = lexicalHnd->errorString();
2249 d->error = XMLERR_ERRORPARSINGPROLOG;
2254 if ( xmldecl_possible && !d->xmlVersion.isEmpty() ) {
2255 QString value( "version = '" );
2256 value += d->xmlVersion;
2258 if ( !d->encoding.isEmpty() ) {
2259 value += " encoding = '";
2260 value += d->encoding;
2263 if ( d->standalone == QXmlSimpleReaderPrivate::Yes ) {
2264 value += " standalone = 'yes'";
2265 } else if ( d->standalone == QXmlSimpleReaderPrivate::No ) {
2266 value += " standalone = 'no'";
2268 if ( !contentHnd->processingInstruction( "xml", value ) ) {
2269 d->error = contentHnd->errorString();
2273 if ( !contentHnd->processingInstruction( name(), string() ) ) {
2274 d->error = contentHnd->errorString();
2279 // XML declaration only on first position possible
2280 xmldecl_possible = FALSE;
2285 d->error = XMLERR_ERRORPARSINGELEMENT;
2299 Parse an element [39].
2301 Precondition: the opening '<' is already read.
2303 bool QXmlSimpleReader::parseElement()
2305 static QString uri, lname, prefix;
2308 const signed char Init = 0;
2309 const signed char ReadName = 1;
2310 const signed char Ws1 = 2;
2311 const signed char STagEnd = 3;
2312 const signed char STagEnd2 = 4;
2313 const signed char ETagBegin = 5;
2314 const signed char ETagBegin2 = 6;
2315 const signed char Ws2 = 7;
2316 const signed char EmptyTag = 8;
2317 const signed char Attribute = 9;
2318 const signed char Ws3 = 10;
2319 const signed char Done = 11;
2321 const signed char InpWs = 0; // whitespace
2322 const signed char InpNameBe = 1; // is_NameBeginning()
2323 const signed char InpGt = 2; // >
2324 const signed char InpSlash = 3; // /
2325 const signed char InpUnknown = 4;
2327 // use some kind of state machine for parsing
2328 static signed char table[11][5] = {
2329 /* InpWs InpNameBe InpGt InpSlash InpUnknown */
2330 { -1, ReadName, -1, -1, -1 }, // Init
2331 { Ws1, Attribute, STagEnd, EmptyTag, -1 }, // ReadName
2332 { -1, Attribute, STagEnd, EmptyTag, -1 }, // Ws1
2333 { STagEnd2, STagEnd2, STagEnd2, STagEnd2, STagEnd2 }, // STagEnd
2334 { -1, -1, -1, ETagBegin, -1 }, // STagEnd2
2335 { -1, ETagBegin2, -1, -1, -1 }, // ETagBegin
2336 { Ws2, -1, Done, -1, -1 }, // ETagBegin2
2337 { -1, -1, Done, -1, -1 }, // Ws2
2338 { -1, -1, Done, -1, -1 }, // EmptyTag
2339 { Ws3, Attribute, STagEnd, EmptyTag, -1 }, // Attribute
2340 { -1, Attribute, STagEnd, EmptyTag, -1 } // Ws3
2342 signed char state = Init;
2344 bool parseOk = TRUE;
2350 d->error = XMLERR_UNEXPECTEDEOF;
2355 } else if ( is_NameBeginning(c) ) {
2357 } else if ( c == '>' ) {
2359 } else if ( c == '/' ) {
2365 //qDebug( "%d -%d(%c)-> %d", state, input, c.latin1(), table[state][input] );
2366 state = table[state][input];
2368 // in some cases do special actions depending on state
2371 parseOk = parseName();
2381 if ( d->useNamespaces ) {
2382 d->namespaceSupport.processName( tags.top(), FALSE, uri, lname );
2383 t = contentHnd->startElement( uri, lname, tags.top(), d->attList );
2385 t = contentHnd->startElement( "", "", tags.top(), d->attList );
2388 d->error = contentHnd->errorString();
2395 parseOk = parseContent();
2401 // get the name of the tag
2402 parseOk = parseName();
2405 if ( tags.isEmpty() ) {
2406 d->error = XMLERR_TAGMISMATCH;
2409 if ( !parseElementEmptyTag( t, uri, lname ) )
2415 // get name and value of attribute
2416 parseOk = parseAttribute();
2422 // no input is read after this
2426 d->error = XMLERR_ERRORPARSINGNAME;
2429 // store it on the stack
2430 tags.push( name() );
2431 // empty the attributes
2432 d->attList.qnameList.clear();
2433 d->attList.uriList.clear();
2434 d->attList.localnameList.clear();
2435 d->attList.valueList.clear();
2436 // namespace support?
2437 if ( d->useNamespaces ) {
2438 d->namespaceSupport.pushContext();
2443 d->error = XMLERR_ERRORPARSINGCONTENT;
2449 d->error = XMLERR_ERRORPARSINGNAME;
2452 if ( !parseElementETagBegin2( uri, lname ) )
2457 d->error = XMLERR_ERRORPARSINGATTRIBUTE;
2460 if ( !parseElementAttribute( prefix, uri, lname ) )
2466 d->error = XMLERR_ERRORPARSINGELEMENT;
2479 Helper to break down the size of the code in the case statement.
2480 Return FALSE on error, otherwise TRUE.
2482 bool QXmlSimpleReader::parseElementEmptyTag( bool &t, QString &uri, QString &lname )
2484 // pop the stack and call the handler
2486 // report startElement first...
2487 if ( d->useNamespaces ) {
2488 d->namespaceSupport.processName( tags.top(), FALSE, uri, lname );
2489 t = contentHnd->startElement( uri, lname, tags.top(), d->attList );
2491 t = contentHnd->startElement( "", "", tags.top(), d->attList );
2494 d->error = contentHnd->errorString();
2497 // ... followed by endElement
2498 if ( d->useNamespaces ) {
2499 if ( !contentHnd->endElement( uri, lname,tags.pop() ) ) {
2500 d->error = contentHnd->errorString();
2505 if ( !contentHnd->endElement( "","",tags.pop() ) ) {
2506 d->error = contentHnd->errorString();
2510 // namespace support?
2511 if ( d->useNamespaces ) {
2512 QStringList prefixesBefore, prefixesAfter;
2514 prefixesBefore = d->namespaceSupport.prefixes();
2516 d->namespaceSupport.popContext();
2517 // call the handler for prefix mapping
2519 prefixesAfter = d->namespaceSupport.prefixes();
2520 for ( QStringList::Iterator it = prefixesBefore.begin(); it != prefixesBefore.end(); ++it ) {
2521 if ( prefixesAfter.contains(*it) == 0 ) {
2522 if ( !contentHnd->endPrefixMapping( *it ) ) {
2523 d->error = contentHnd->errorString();
2536 Helper to break down the size of the code in the case statement.
2537 Return FALSE on error, otherwise TRUE.
2539 bool QXmlSimpleReader::parseElementETagBegin2( QString &uri, QString &lname )
2542 // pop the stack and compare it with the name
2543 if ( tags.pop() != name() ) {
2544 d->error = XMLERR_TAGMISMATCH;
2549 if ( d->useNamespaces ) {
2550 d->namespaceSupport.processName( name(), FALSE, uri, lname );
2551 if ( !contentHnd->endElement(uri,lname,name()) ) {
2552 d->error = contentHnd->errorString();
2557 if ( !contentHnd->endElement("","",name()) ) {
2558 d->error = contentHnd->errorString();
2563 // namespace support?
2564 if ( d->useNamespaces ) {
2565 QStringList prefixesBefore, prefixesAfter;
2567 prefixesBefore = d->namespaceSupport.prefixes();
2569 d->namespaceSupport.popContext();
2570 // call the handler for prefix mapping
2572 prefixesAfter = d->namespaceSupport.prefixes();
2573 for ( QStringList::Iterator it = prefixesBefore.begin(); it != prefixesBefore.end(); ++it ) {
2574 if ( prefixesAfter.contains(*it) == 0 ) {
2575 if ( !contentHnd->endPrefixMapping( *it ) ) {
2576 d->error = contentHnd->errorString();
2586 Helper to break down the size of the code in the case statement.
2587 Return FALSE on error, otherwise TRUE.
2589 bool QXmlSimpleReader::parseElementAttribute( QString &prefix, QString &uri, QString &lname )
2591 // add the attribute to the list
2592 if ( d->useNamespaces ) {
2593 // is it a namespace declaration?
2594 d->namespaceSupport.splitName( name(), prefix, lname );
2595 if ( prefix == "xmlns" ) {
2596 // namespace declaration
2597 d->namespaceSupport.setPrefix( lname, string() );
2598 if ( d->useNamespacePrefixes ) {
2599 d->attList.qnameList.append( name() );
2600 d->attList.uriList.append( "" );
2601 d->attList.localnameList.append( "" );
2602 d->attList.valueList.append( string() );
2604 // call the handler for prefix mapping
2606 if ( !contentHnd->startPrefixMapping( lname, string() ) ) {
2607 d->error = contentHnd->errorString();
2612 // no namespace delcaration
2613 d->namespaceSupport.processName( name(), TRUE, uri, lname );
2614 d->attList.qnameList.append( name() );
2615 d->attList.uriList.append( uri );
2616 d->attList.localnameList.append( lname );
2617 d->attList.valueList.append( string() );
2620 // no namespace support
2621 d->attList.qnameList.append( name() );
2622 d->attList.uriList.append( "" );
2623 d->attList.localnameList.append( "" );
2624 d->attList.valueList.append( string() );
2630 Parse a content [43].
2632 A content is only used between tags. If a end tag is found the < is already
2633 read and the head stand on the '/' of the end tag '</name>'.
2635 bool QXmlSimpleReader::parseContent()
2637 bool charDataRead = FALSE;
2639 const signed char Init = 0;
2640 const signed char ChD = 1; // CharData
2641 const signed char ChD1 = 2; // CharData help state
2642 const signed char ChD2 = 3; // CharData help state
2643 const signed char Ref = 4; // Reference
2644 const signed char Lt = 5; // '<' read
2645 const signed char PI = 6; // PI
2646 const signed char Elem = 7; // Element
2647 const signed char Em = 8; // '!' read
2648 const signed char Com = 9; // Comment
2649 const signed char CDS = 10; // CDSect
2650 const signed char CDS1 = 11; // read a CDSect
2651 const signed char CDS2 = 12; // read a CDSect (help state)
2652 const signed char CDS3 = 13; // read a CDSect (help state)
2653 const signed char Done = 14; // finished reading content
2655 const signed char InpLt = 0; // <
2656 const signed char InpGt = 1; // >
2657 const signed char InpSlash = 2; // /
2658 const signed char InpQMark = 3; // ?
2659 const signed char InpEMark = 4; // !
2660 const signed char InpAmp = 5; // &
2661 const signed char InpDash = 6; // -
2662 const signed char InpOpenB = 7; // [
2663 const signed char InpCloseB = 8; // ]
2664 const signed char InpUnknown = 9;
2666 static signed char mapCLT2FSMChar[] = {
2667 InpUnknown, // white space
2681 InpUnknown // unknown
2684 // use some kind of state machine for parsing
2685 static signed char const table[14][10] = {
2686 /* InpLt InpGt InpSlash InpQMark InpEMark InpAmp InpDash InpOpenB InpCloseB InpUnknown */
2687 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD1, ChD }, // Init
2688 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD1, ChD }, // ChD
2689 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD2, ChD }, // ChD1
2690 { Lt, -1, ChD, ChD, ChD, Ref, ChD, ChD, ChD2, ChD }, // ChD2
2691 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD, ChD }, // Ref (same as Init)
2692 { -1, -1, Done, PI, Em, -1, -1, -1, -1, Elem }, // Lt
2693 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD, ChD }, // PI (same as Init)
2694 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD, ChD }, // Elem (same as Init)
2695 { -1, -1, -1, -1, -1, -1, Com, CDS, -1, -1 }, // Em
2696 { Lt, ChD, ChD, ChD, ChD, Ref, ChD, ChD, ChD, ChD }, // Com (same as Init)
2697 { CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS2, CDS1 }, // CDS
2698 { CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS2, CDS1 }, // CDS1
2699 { CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS3, CDS1 }, // CDS2
2700 { CDS1, Init, CDS1, CDS1, CDS1, CDS1, CDS1, CDS1, CDS3, CDS1 } // CDS3
2702 signed char state = Init;
2704 bool parseOk = TRUE;
2708 // get input (use lookup-table instead of nested ifs for performance
2711 d->error = XMLERR_UNEXPECTEDEOF;
2717 input = mapCLT2FSMChar[ charLookupTable[ c.cell() ] ];
2720 // set state according to input
2721 state = table[state][input];
2723 // do some actions according to state
2730 // on first call: clear string
2731 if ( !charDataRead ) {
2732 charDataRead = TRUE;
2739 // on first call: clear string
2740 if ( !charDataRead ) {
2741 charDataRead = TRUE;
2752 if ( !charDataRead) {
2753 // reference may be CharData; so clear string to be safe
2755 parseOk = parseReference( charDataRead, InContent );
2758 parseOk = parseReference( tmp, InContent );
2762 // call the handler for CharData
2764 if ( charDataRead ) {
2765 if ( d->reportWhitespaceCharData || !string().simplifyWhiteSpace().isEmpty() ) {
2766 if ( !contentHnd->characters( string() ) ) {
2767 d->error = contentHnd->errorString();
2773 charDataRead = FALSE;
2778 parseOk = parsePI();
2781 parseOk = parseElement();
2788 parseOk = parseComment();
2791 parseOk = parseString( "[CDATA[" );
2794 // read one character and add it
2807 // no input is read after this
2811 d->error = XMLERR_ERRORPARSINGREFERENCE;
2817 d->error = XMLERR_ERRORPARSINGPI;
2822 if ( !contentHnd->processingInstruction(name(),string()) ) {
2823 d->error = contentHnd->errorString();
2830 d->error = XMLERR_ERRORPARSINGELEMENT;
2836 d->error = XMLERR_ERRORPARSINGCOMMENT;
2840 if ( !lexicalHnd->comment( string() ) ) {
2841 d->error = lexicalHnd->errorString();
2848 d->error = XMLERR_CDSECTHEADEREXPECTED;
2860 // test if this skipping was legal
2862 // the end of the CDSect
2864 if ( !lexicalHnd->startCDATA() ) {
2865 d->error = lexicalHnd->errorString();
2870 if ( !contentHnd->characters( string() ) ) {
2871 d->error = contentHnd->errorString();
2876 if ( !lexicalHnd->endCDATA() ) {
2877 d->error = lexicalHnd->errorString();
2881 } else if (c == ']') {
2882 // three or more ']'
2885 // after ']]' comes another character
2891 // call the handler for CharData
2893 if ( charDataRead ) {
2894 if ( d->reportWhitespaceCharData || !string().simplifyWhiteSpace().isEmpty() ) {
2895 if ( !contentHnd->characters( string() ) ) {
2896 d->error = contentHnd->errorString();
2906 d->error = XMLERR_ERRORPARSINGCONTENT;
2922 bool QXmlSimpleReader::parseMisc()
2924 const signed char Init = 0;
2925 const signed char Lt = 1; // '<' was read
2926 const signed char Comment = 2; // read comment
2927 const signed char eatWS = 3; // eat whitespaces
2928 const signed char PI = 4; // read PI
2929 const signed char Comment2 = 5; // read comment
2931 const signed char InpWs = 0; // S
2932 const signed char InpLt = 1; // <
2933 const signed char InpQm = 2; // ?
2934 const signed char InpEm = 3; // !
2935 const signed char InpUnknown = 4;
2937 // use some kind of state machine for parsing
2938 static signed char table[3][5] = {
2939 /* InpWs InpLt InpQm InpEm InpUnknown */
2940 { eatWS, Lt, -1, -1, -1 }, // Init
2941 { -1, -1, PI, Comment, -1 }, // Lt
2942 { -1, -1, -1, -1, Comment2 } // Comment
2944 signed char state = Init;
2946 bool parseOk = TRUE;
2952 d->error = XMLERR_UNEXPECTEDEOF;
2957 } else if ( c == '<' ) {
2959 } else if ( c == '?' ) {
2961 } else if ( c == '!' ) {
2967 // set state according to input
2968 state = table[state][input];
2970 // do some actions according to state
2979 parseOk = parsePI();
2985 parseOk = parseComment();
2988 // no input is read after this
2994 d->error = XMLERR_ERRORPARSINGPI;
2998 if ( !contentHnd->processingInstruction(name(),string()) ) {
2999 d->error = contentHnd->errorString();
3006 d->error = XMLERR_ERRORPARSINGCOMMENT;
3010 if ( !lexicalHnd->comment( string() ) ) {
3011 d->error = lexicalHnd->errorString();
3018 d->error = XMLERR_UNEXPECTEDCHARACTER;
3032 Parse a processing instruction [16].
3034 If xmldec is TRUE, it tries to parse a PI or a XML declaration [23].
3036 Precondition: the beginning '<' of the PI is already read and the head stand
3039 If this funktion was successful, the head-position is on the first
3040 character after the PI.
3042 bool QXmlSimpleReader::parsePI( bool xmldecl )
3044 const signed char Init = 0;
3045 const signed char QmI = 1; // ? was read
3046 const signed char Name = 2; // read Name
3047 const signed char XMLDecl = 3; // read XMLDecl
3048 const signed char Ws1 = 4; // eat ws after "xml" of XMLDecl
3049 const signed char PI = 5; // read PI
3050 const signed char Ws2 = 6; // eat ws after Name of PI
3051 const signed char Version = 7; // read versionInfo
3052 const signed char Ws3 = 8; // eat ws after versionInfo
3053 const signed char EorSD = 9; // read EDecl or SDDecl
3054 const signed char Ws4 = 10; // eat ws after EDecl or SDDecl
3055 const signed char SD = 11; // read SDDecl
3056 const signed char Ws5 = 12; // eat ws after SDDecl
3057 const signed char ADone = 13; // almost done
3058 const signed char Char = 14; // Char was read
3059 const signed char Qm = 15; // Qm was read
3060 const signed char Done = 16; // finished reading content
3062 const signed char InpWs = 0; // whitespace
3063 const signed char InpNameBe = 1; // is_nameBeginning()
3064 const signed char InpGt = 2; // >
3065 const signed char InpQm = 3; // ?
3066 const signed char InpUnknown = 4;
3068 // use some kind of state machine for parsing
3069 static signed char table[16][5] = {
3070 /* InpWs, InpNameBe InpGt InpQm InpUnknown */
3071 { -1, -1, -1, QmI, -1 }, // Init
3072 { -1, Name, -1, -1, -1 }, // QmI
3073 { -1, -1, -1, -1, -1 }, // Name (this state is left not through input)
3074 { Ws1, -1, -1, -1, -1 }, // XMLDecl
3075 { -1, Version, -1, -1, -1 }, // Ws1
3076 { Ws2, -1, -1, Qm, -1 }, // PI
3077 { Char, Char, Char, Qm, Char }, // Ws2
3078 { Ws3, -1, -1, ADone, -1 }, // Version
3079 { -1, EorSD, -1, ADone, -1 }, // Ws3
3080 { Ws4, -1, -1, ADone, -1 }, // EorSD
3081 { -1, SD, -1, ADone, -1 }, // Ws4
3082 { Ws5, -1, -1, ADone, -1 }, // SD
3083 { -1, -1, -1, ADone, -1 }, // Ws5
3084 { -1, -1, Done, -1, -1 }, // ADone
3085 { Char, Char, Char, Qm, Char }, // Char
3086 { Char, Char, Done, Qm, Char }, // Qm
3088 signed char state = Init;
3090 bool parseOk = TRUE;
3096 d->error = XMLERR_UNEXPECTEDEOF;
3101 } else if ( is_NameBeginning(c) ) {
3103 } else if ( c == '>' ) {
3105 } else if ( c == '?' ) {
3111 // set state according to input
3112 state = table[state][input];
3114 // do some actions according to state
3120 parseOk = parseName();
3130 parseOk = parseAttribute();
3133 parseOk = parseAttribute();
3136 // get the SDDecl (syntax like an attribute)
3137 if ( d->standalone != QXmlSimpleReaderPrivate::Unknown ) {
3138 // already parsed the standalone declaration
3139 d->error = XMLERR_UNEXPECTEDCHARACTER;
3142 parseOk = parseAttribute();
3159 // no input is read after this
3163 d->error = XMLERR_ERRORPARSINGNAME;
3166 // test what name was read and determine the next state
3167 // (not very beautiful, I admit)
3168 if ( name().lower() == "xml" ) {
3169 if ( xmldecl && name()=="xml" ) {
3172 d->error = XMLERR_INVALIDNAMEFORPI;
3181 // get version (syntax like an attribute)
3183 d->error = XMLERR_VERSIONEXPECTED;
3186 if ( name() != "version" ) {
3187 d->error = XMLERR_VERSIONEXPECTED;
3190 d->xmlVersion = string();
3193 // get the EDecl or SDDecl (syntax like an attribute)
3195 d->error = XMLERR_EDECLORSDDECLEXPECTED;
3198 if ( name() == "standalone" ) {
3199 if ( string()=="yes" ) {
3200 d->standalone = QXmlSimpleReaderPrivate::Yes;
3201 } else if ( string()=="no" ) {
3202 d->standalone = QXmlSimpleReaderPrivate::No;
3204 d->error = XMLERR_WRONGVALUEFORSDECL;
3207 } else if ( name() == "encoding" ) {
3208 d->encoding = string();
3210 d->error = XMLERR_EDECLORSDDECLEXPECTED;
3216 d->error = XMLERR_SDDECLEXPECTED;
3219 if ( name() != "standalone" ) {
3220 d->error = XMLERR_SDDECLEXPECTED;
3223 if ( string()=="yes" ) {
3224 d->standalone = QXmlSimpleReaderPrivate::Yes;
3225 } else if ( string()=="no" ) {
3226 d->standalone = QXmlSimpleReaderPrivate::No;
3228 d->error = XMLERR_WRONGVALUEFORSDECL;
3233 // test if the skipping was legal
3242 d->error = XMLERR_UNEXPECTEDCHARACTER;
3256 Parse a document type definition (doctypedecl [28]).
3258 Precondition: the beginning '<!' of the doctype is already read the head
3259 stands on the 'D' of '<!DOCTYPE'.
3261 If this funktion was successful, the head-position is on the first
3262 character after the document type definition.
3264 bool QXmlSimpleReader::parseDoctype()
3267 d->systemId = QString::null;
3268 d->publicId = QString::null;
3270 const signed char Init = 0;
3271 const signed char Doctype = 1; // read the doctype
3272 const signed char Ws1 = 2; // eat_ws
3273 const signed char Doctype2 = 3; // read the doctype, part 2
3274 const signed char Ws2 = 4; // eat_ws
3275 const signed char Sys = 5; // read SYSTEM
3276 const signed char Ws3 = 6; // eat_ws
3277 const signed char MP = 7; // markupdecl or PEReference
3278 const signed char PER = 8; // PERReference
3279 const signed char Mup = 9; // markupdecl
3280 const signed char Ws4 = 10; // eat_ws
3281 const signed char MPE = 11; // end of markupdecl or PEReference
3282 const signed char Done = 12;
3284 const signed char InpWs = 0;
3285 const signed char InpD = 1; // 'D'
3286 const signed char InpS = 2; // 'S' or 'P'
3287 const signed char InpOB = 3; // [
3288 const signed char InpCB = 4; // ]
3289 const signed char InpPer = 5; // %
3290 const signed char InpGt = 6; // >
3291 const signed char InpUnknown = 7;
3293 // use some kind of state machine for parsing
3294 static signed char table[12][8] = {
3295 /* InpWs, InpD InpS InpOB InpCB InpPer InpGt InpUnknown */
3296 { -1, Doctype, -1, -1, -1, -1, -1, -1 }, // Init
3297 { Ws1, Doctype2, Doctype2, -1, -1, -1, -1, Doctype2 }, // Doctype
3298 { -1, Doctype2, Doctype2, -1, -1, -1, -1, Doctype2 }, // Ws1
3299 { Ws2, -1, Sys, MP, -1, -1, Done, -1 }, // Doctype2
3300 { -1, -1, Sys, MP, -1, -1, Done, -1 }, // Ws2
3301 { Ws3, -1, -1, MP, -1, -1, Done, -1 }, // Sys
3302 { -1, -1, -1, MP, -1, -1, Done, -1 }, // Ws3
3303 { -1, -1, -1, -1, MPE, PER, -1, Mup }, // MP
3304 { Ws4, -1, -1, -1, MPE, PER, -1, Mup }, // PER
3305 { Ws4, -1, -1, -1, MPE, PER, -1, Mup }, // Mup
3306 { -1, -1, -1, -1, MPE, PER, -1, Mup }, // Ws4
3307 { -1, -1, -1, -1, -1, -1, Done, -1 } // MPE
3309 signed char state = Init;
3311 bool parseOk = TRUE;
3317 d->error = XMLERR_UNEXPECTEDEOF;
3322 } else if ( c == 'D' ) {
3324 } else if ( c == 'S' ) {
3326 } else if ( c == 'P' ) {
3328 } else if ( c == '[' ) {
3330 } else if ( c == ']' ) {
3332 } else if ( c == '%' ) {
3334 } else if ( c == '>' ) {
3340 // set state according to input
3341 state = table[state][input];
3343 // do some actions according to state
3346 parseOk = parseString( "DOCTYPE" );
3358 parseOk = parseExternalID();
3364 parseOk = parsePEReference( InDTD );
3367 parseOk = parseMarkupdecl();
3374 if ( !lexicalHnd->endDTD() ) {
3375 d->error = lexicalHnd->errorString();
3382 // no input is read after this
3386 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3390 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3395 d->doctype = name();
3397 if ( !lexicalHnd->startDTD( d->doctype, d->publicId, d->systemId ) ) {
3398 d->error = lexicalHnd->errorString();
3405 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3411 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3417 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3425 d->error = XMLERR_ERRORPARSINGDOCTYPE;
3439 Parse a ExternalID [75].
3441 If allowPublicID is TRUE parse ExternalID [75] or PublicID [83].
3443 bool QXmlSimpleReader::parseExternalID( bool allowPublicID )
3446 d->systemId = QString::null;
3447 d->publicId = QString::null;
3449 const signed char Init = 0;
3450 const signed char Sys = 1; // parse 'SYSTEM'
3451 const signed char SysWS = 2; // parse the whitespace after 'SYSTEM'
3452 const signed char SysSQ = 3; // parse SystemLiteral with '
3453 const signed char SysSQ2 = 4; // parse SystemLiteral with '
3454 const signed char SysDQ = 5; // parse SystemLiteral with "
3455 const signed char SysDQ2 = 6; // parse SystemLiteral with "
3456 const signed char Pub = 7; // parse 'PUBLIC'
3457 const signed char PubWS = 8; // parse the whitespace after 'PUBLIC'
3458 const signed char PubSQ = 9; // parse PubidLiteral with '
3459 const signed char PubSQ2 = 10; // parse PubidLiteral with '
3460 const signed char PubDQ = 11; // parse PubidLiteral with "
3461 const signed char PubDQ2 = 12; // parse PubidLiteral with "
3462 const signed char PubE = 13; // finished parsing the PubidLiteral
3463 const signed char PubWS2 = 14; // parse the whitespace after the PubidLiteral
3464 const signed char PDone = 15; // done if allowPublicID is TRUE
3465 const signed char Done = 16;
3467 const signed char InpSQ = 0; // '
3468 const signed char InpDQ = 1; // "
3469 const signed char InpS = 2; // S
3470 const signed char InpP = 3; // P
3471 const signed char InpWs = 4; // white space
3472 const signed char InpUnknown = 5;
3474 // use some kind of state machine for parsing
3475 static signed char table[15][6] = {
3476 /* InpSQ InpDQ InpS InpP InpWs InpUnknown */
3477 { -1, -1, Sys, Pub, -1, -1 }, // Init
3478 { -1, -1, -1, -1, SysWS, -1 }, // Sys
3479 { SysSQ, SysDQ, -1, -1, -1, -1 }, // SysWS
3480 { Done, SysSQ2, SysSQ2, SysSQ2, SysSQ2, SysSQ2 }, // SysSQ
3481 { Done, SysSQ2, SysSQ2, SysSQ2, SysSQ2, SysSQ2 }, // SysSQ2
3482 { SysDQ2, Done, SysDQ2, SysDQ2, SysDQ2, SysDQ2 }, // SysDQ
3483 { SysDQ2, Done, SysDQ2, SysDQ2, SysDQ2, SysDQ2 }, // SysDQ2
3484 { -1, -1, -1, -1, PubWS, -1 }, // Pub
3485 { PubSQ, PubDQ, -1, -1, -1, -1 }, // PubWS
3486 { PubE, -1, PubSQ2, PubSQ2, PubSQ2, PubSQ2 }, // PubSQ
3487 { PubE, -1, PubSQ2, PubSQ2, PubSQ2, PubSQ2 }, // PubSQ2
3488 { -1, PubE, PubDQ2, PubDQ2, PubDQ2, PubDQ2 }, // PubDQ
3489 { -1, PubE, PubDQ2, PubDQ2, PubDQ2, PubDQ2 }, // PubDQ2
3490 { PDone, PDone, PDone, PDone, PubWS2, PDone }, // PubE
3491 { SysSQ, SysDQ, PDone, PDone, PDone, PDone } // PubWS2
3493 signed char state = Init;
3495 bool parseOk = TRUE;
3501 d->error = XMLERR_UNEXPECTEDEOF;
3506 } else if ( c == '\'' ) {
3508 } else if ( c == '"' ) {
3510 } else if ( c == 'S' ) {
3512 } else if ( c == 'P' ) {
3518 // set state according to input
3519 state = table[state][input];
3521 // do some actions according to state
3524 parseOk = parseString( "SYSTEM" );
3540 parseOk = parseString( "PUBLIC" );
3559 d->publicId = string();
3563 d->systemId = string();
3567 // no input is read after this
3571 d->error = XMLERR_UNEXPECTEDCHARACTER;
3577 d->error = XMLERR_UNEXPECTEDCHARACTER;
3582 if ( allowPublicID ) {
3583 d->publicId = string();
3586 d->error = XMLERR_UNEXPECTEDCHARACTER;
3594 d->error = XMLERR_UNEXPECTEDCHARACTER;
3608 Parse a markupdecl [29].
3610 bool QXmlSimpleReader::parseMarkupdecl()
3612 const signed char Init = 0;
3613 const signed char Lt = 1; // < was read
3614 const signed char Em = 2; // ! was read
3615 const signed char CE = 3; // E was read
3616 const signed char Qm = 4; // ? was read
3617 const signed char Dash = 5; // - was read
3618 const signed char CA = 6; // A was read
3619 const signed char CEL = 7; // EL was read
3620 const signed char CEN = 8; // EN was read
3621 const signed char CN = 9; // N was read
3622 const signed char Done = 10;
3624 const signed char InpLt = 0; // <
3625 const signed char InpQm = 1; // ?
3626 const signed char InpEm = 2; // !
3627 const signed char InpDash = 3; // -
3628 const signed char InpA = 4; // A
3629 const signed char InpE = 5; // E
3630 const signed char InpL = 6; // L
3631 const signed char InpN = 7; // N
3632 const signed char InpUnknown = 8;
3634 // use some kind of state machine for parsing
3635 static signed char table[4][9] = {
3636 /* InpLt InpQm InpEm InpDash InpA InpE InpL InpN InpUnknown */
3637 { Lt, -1, -1, -1, -1, -1, -1, -1, -1 }, // Init
3638 { -1, Qm, Em, -1, -1, -1, -1, -1, -1 }, // Lt
3639 { -1, -1, -1, Dash, CA, CE, -1, CN, -1 }, // Em
3640 { -1, -1, -1, -1, -1, -1, CEL, CEN, -1 } // CE
3642 signed char state = Init;
3644 bool parseOk = TRUE;
3650 d->error = XMLERR_UNEXPECTEDEOF;
3655 } else if ( c == '?' ) {
3657 } else if ( c == '!' ) {
3659 } else if ( c == '-' ) {
3661 } else if ( c == 'A' ) {
3663 } else if ( c == 'E' ) {
3665 } else if ( c == 'L' ) {
3667 } else if ( c == 'N' ) {
3673 // set state according to input
3674 state = table[state][input];
3676 // do some actions according to state
3688 parseOk = parsePI();
3691 parseOk = parseComment();
3694 parseOk = parseAttlistDecl();
3697 parseOk = parseElementDecl();
3700 parseOk = parseEntityDecl();
3703 parseOk = parseNotationDecl();
3706 // no input is read after this
3710 d->error = XMLERR_ERRORPARSINGPI;
3714 if ( !contentHnd->processingInstruction(name(),string()) ) {
3715 d->error = contentHnd->errorString();
3722 d->error = XMLERR_ERRORPARSINGCOMMENT;
3726 if ( !lexicalHnd->comment( string() ) ) {
3727 d->error = lexicalHnd->errorString();
3734 d->error = XMLERR_ERRORPARSINGATTLISTDECL;
3740 d->error = XMLERR_ERRORPARSINGELEMENTDECL;
3746 d->error = XMLERR_ERRORPARSINGENTITYDECL;
3752 d->error = XMLERR_ERRORPARSINGNOTATIONDECL;
3760 d->error = XMLERR_LETTEREXPECTED;
3774 Parse a PEReference [69]
3776 bool QXmlSimpleReader::parsePEReference( EntityRecognitionContext context )
3778 const signed char Init = 0;
3779 const signed char Next = 1;
3780 const signed char Name = 2;
3781 const signed char Done = 3;
3783 const signed char InpSemi = 0; // ;
3784 const signed char InpPer = 1; // %
3785 const signed char InpUnknown = 2;
3787 // use some kind of state machine for parsing
3788 static signed char table[3][3] = {
3789 /* InpSemi InpPer InpUnknown */
3790 { -1, Next, -1 }, // Init
3791 { -1, -1, Name }, // Next
3792 { Done, -1, -1 } // Name
3794 signed char state = Init;
3796 bool parseOk = TRUE;
3802 d->error = XMLERR_UNEXPECTEDEOF;
3807 } else if ( c == '%' ) {
3813 // set state according to input
3814 state = table[state][input];
3816 // do some actions according to state
3822 parseOk = parseName( TRUE );
3828 // no input is read after this
3832 d->error = XMLERR_ERRORPARSINGNAME;
3835 if ( d->parameterEntities.find( ref() ) == d->parameterEntities.end() ) {
3838 if ( !contentHnd->skippedEntity( QString("%") + ref() ) ) {
3839 d->error = contentHnd->errorString();
3844 if ( context == InEntityValue ) {
3845 // Included in literal
3846 xmlRef = d->parameterEntities.find( ref() )
3847 .data().replace( QRegExp("\""), """ ).replace( QRegExp("'"), "'" )
3849 } else if ( context == InDTD ) {
3851 xmlRef = QString(" ") +
3852 d->parameterEntities.find( ref() ).data() +
3853 QString(" ") + xmlRef;
3861 d->error = XMLERR_LETTEREXPECTED;
3875 Parse a AttlistDecl [52].
3877 Precondition: the beginning '<!' is already read and the head
3878 stands on the 'A' of '<!ATTLIST'
3880 bool QXmlSimpleReader::parseAttlistDecl()
3882 const signed char Init = 0;
3883 const signed char Attlist = 1; // parse the string "ATTLIST"
3884 const signed char Ws = 2; // whitespace read
3885 const signed char Name = 3; // parse name
3886 const signed char Ws1 = 4; // whitespace read
3887 const signed char Attdef = 5; // parse the AttDef
3888 const signed char Ws2 = 6; // whitespace read
3889 const signed char Atttype = 7; // parse the AttType
3890 const signed char Ws3 = 8; // whitespace read
3891 const signed char DDecH = 9; // DefaultDecl with #
3892 const signed char DefReq = 10; // parse the string "REQUIRED"
3893 const signed char DefImp = 11; // parse the string "IMPLIED"
3894 const signed char DefFix = 12; // parse the string "FIXED"
3895 const signed char Attval = 13; // parse the AttValue
3896 const signed char Ws4 = 14; // whitespace read
3897 const signed char Done = 15;
3899 const signed char InpWs = 0; // white space
3900 const signed char InpGt = 1; // >
3901 const signed char InpHash = 2; // #
3902 const signed char InpA = 3; // A
3903 const signed char InpI = 4; // I
3904 const signed char InpF = 5; // F
3905 const signed char InpR = 6; // R
3906 const signed char InpUnknown = 7;
3908 // use some kind of state machine for parsing
3909 static signed char table[15][8] = {
3910 /* InpWs InpGt InpHash InpA InpI InpF InpR InpUnknown */
3911 { -1, -1, -1, Attlist, -1, -1, -1, -1 }, // Init
3912 { Ws, -1, -1, -1, -1, -1, -1, -1 }, // Attlist
3913 { -1, -1, -1, Name, Name, Name, Name, Name }, // Ws
3914 { Ws1, Done, Attdef, Attdef, Attdef, Attdef, Attdef, Attdef }, // Name
3915 { -1, Done, Attdef, Attdef, Attdef, Attdef, Attdef, Attdef }, // Ws1
3916 { Ws2, -1, -1, -1, -1, -1, -1, -1 }, // Attdef
3917 { -1, Atttype, Atttype, Atttype, Atttype, Atttype, Atttype, Atttype }, // Ws2
3918 { Ws3, -1, -1, -1, -1, -1, -1, -1 }, // Attype
3919 { -1, Attval, DDecH, Attval, Attval, Attval, Attval, Attval }, // Ws3
3920 { -1, -1, -1, -1, DefImp, DefFix, DefReq, -1 }, // DDecH
3921 { Ws4, Ws4, -1, -1, -1, -1, -1, -1 }, // DefReq
3922 { Ws4, Ws4, -1, -1, -1, -1, -1, -1 }, // DefImp
3923 { Ws3, -1, -1, -1, -1, -1, -1, -1 }, // DefFix
3924 { Ws4, Ws4, -1, -1, -1, -1, -1, -1 }, // Attval
3925 { -1, Done, Attdef, Attdef, Attdef, Attdef, Attdef, Attdef } // Ws4
3927 signed char state = Init;
3929 bool parseOk = TRUE;
3935 d->error = XMLERR_UNEXPECTEDEOF;
3940 } else if ( c == '>' ) {
3942 } else if ( c == '#' ) {
3944 } else if ( c == 'A' ) {
3946 } else if ( c == 'I' ) {
3948 } else if ( c == 'F' ) {
3950 } else if ( c == 'R' ) {
3956 // set state according to input
3957 state = table[state][input];
3959 // do some actions according to state
3962 parseOk = parseString( "ATTLIST" );
3971 parseOk = parseName();
3974 parseOk = parseName();
3977 parseOk = parseAttType();
3983 parseOk = parseString( "REQUIRED" );
3986 parseOk = parseString( "IMPLIED" );
3989 parseOk = parseString( "FIXED" );
3992 parseOk = parseAttValue();
3996 // TODO: not all values are computed yet...
3997 if ( !declHnd->attributeDecl( d->attDeclEName, d->attDeclAName, "", "", "" ) ) {
3998 d->error = declHnd->errorString();
4008 // no input is read after this
4012 d->error = XMLERR_UNEXPECTEDCHARACTER;
4018 d->error = XMLERR_ERRORPARSINGNAME;
4021 d->attDeclEName = name();
4025 d->error = XMLERR_ERRORPARSINGNAME;
4028 d->attDeclAName = name();
4032 d->error = XMLERR_ERRORPARSINGATTTYPE;
4038 d->error = XMLERR_UNEXPECTEDCHARACTER;
4044 d->error = XMLERR_UNEXPECTEDCHARACTER;
4050 d->error = XMLERR_UNEXPECTEDCHARACTER;
4056 d->error = XMLERR_ERRORPARSINGATTVALUE;
4064 d->error = XMLERR_LETTEREXPECTED;
4078 Parse a AttType [54]
4080 bool QXmlSimpleReader::parseAttType()
4082 const signed char Init = 0;
4083 const signed char ST = 1; // StringType
4084 const signed char TTI = 2; // TokenizedType starting with 'I'
4085 const signed char TTI2 = 3; // TokenizedType helpstate
4086 const signed char TTI3 = 4; // TokenizedType helpstate
4087 const signed char TTE = 5; // TokenizedType starting with 'E'
4088 const signed char TTEY = 6; // TokenizedType starting with 'ENTITY'
4089 const signed char TTEI = 7; // TokenizedType starting with 'ENTITI'
4090 const signed char N = 8; // N read (TokenizedType or Notation)
4091 const signed char TTNM = 9; // TokenizedType starting with 'NM'
4092 const signed char TTNM2 = 10; // TokenizedType helpstate
4093 const signed char NO = 11; // Notation
4094 const signed char NO2 = 12; // Notation helpstate
4095 const signed char NO3 = 13; // Notation helpstate
4096 const signed char NOName = 14; // Notation, read name
4097 const signed char NO4 = 15; // Notation helpstate
4098 const signed char EN = 16; // Enumeration
4099 const signed char ENNmt = 17; // Enumeration, read Nmtoken
4100 const signed char EN2 = 18; // Enumeration helpstate
4101 const signed char ADone = 19; // almost done (make next and accept)
4102 const signed char Done = 20;
4104 const signed char InpWs = 0; // whitespace
4105 const signed char InpOp = 1; // (
4106 const signed char InpCp = 2; // )
4107 const signed char InpPipe = 3; // |
4108 const signed char InpC = 4; // C
4109 const signed char InpE = 5; // E
4110 const signed char InpI = 6; // I
4111 const signed char InpM = 7; // M
4112 const signed char InpN = 8; // N
4113 const signed char InpO = 9; // O
4114 const signed char InpR = 10; // R
4115 const signed char InpS = 11; // S
4116 const signed char InpY = 12; // Y
4117 const signed char InpUnknown = 13;
4119 // use some kind of state machine for parsing
4120 static signed char table[19][14] = {
4121 /* InpWs InpOp InpCp InpPipe InpC InpE InpI InpM InpN InpO InpR InpS InpY InpUnknown */
4122 { -1, EN, -1, -1, ST, TTE, TTI, -1, N, -1, -1, -1, -1, -1 }, // Init
4123 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done }, // ST
4124 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, TTI2, Done, Done, Done }, // TTI
4125 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, TTI3, Done, Done }, // TTI2
4126 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done }, // TTI3
4127 { -1, -1, -1, -1, -1, -1, TTEI, -1, -1, -1, -1, -1, TTEY, -1 }, // TTE
4128 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done }, // TTEY
4129 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done }, // TTEI
4130 { -1, -1, -1, -1, -1, -1, -1, TTNM, -1, NO, -1, -1, -1, -1 }, // N
4131 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, TTNM2, Done, Done }, // TTNM
4132 { Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done, Done }, // TTNM2
4133 { NO2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // NO
4134 { -1, NO3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // NO2
4135 { NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName, NOName }, // NO3
4136 { NO4, -1, ADone, NO3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // NOName
4137 { -1, -1, ADone, NO3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // NO4
4138 { -1, -1, ENNmt, -1, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt, ENNmt }, // EN
4139 { EN2, -1, ADone, EN, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // ENNmt
4140 { -1, -1, ADone, EN, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } // EN2
4142 signed char state = Init;
4144 bool parseOk = TRUE;
4150 d->error = XMLERR_UNEXPECTEDEOF;
4155 } else if ( c == '(' ) {
4157 } else if ( c == ')' ) {
4159 } else if ( c == '|' ) {
4161 } else if ( c == 'C' ) {
4163 } else if ( c == 'E' ) {
4165 } else if ( c == 'I' ) {
4167 } else if ( c == 'M' ) {
4169 } else if ( c == 'N' ) {
4171 } else if ( c == 'O' ) {
4173 } else if ( c == 'R' ) {
4175 } else if ( c == 'S' ) {
4177 } else if ( c == 'Y' ) {
4183 // set state according to input
4184 state = table[state][input];
4186 // do some actions according to state
4189 parseOk = parseString( "CDATA" );
4192 parseOk = parseString( "ID" );
4195 parseOk = parseString( "REF" );
4201 parseOk = parseString( "ENTIT" );
4207 parseOk = parseString( "IES" );
4213 parseOk = parseString( "MTOKEN" );
4219 parseOk = parseString( "OTATION" );
4228 parseOk = parseName();
4237 parseOk = parseNmtoken();
4246 // no input is read after this
4250 d->error = XMLERR_UNEXPECTEDCHARACTER;
4256 d->error = XMLERR_UNEXPECTEDCHARACTER;
4262 d->error = XMLERR_UNEXPECTEDCHARACTER;
4268 d->error = XMLERR_UNEXPECTEDCHARACTER;
4274 d->error = XMLERR_UNEXPECTEDCHARACTER;
4280 d->error = XMLERR_UNEXPECTEDCHARACTER;
4286 d->error = XMLERR_UNEXPECTEDCHARACTER;
4292 d->error = XMLERR_ERRORPARSINGNAME;
4298 d->error = XMLERR_ERRORPARSINGNMTOKEN;
4308 d->error = XMLERR_LETTEREXPECTED;
4322 Parse a AttValue [10]
4324 Precondition: the head stands on the beginning " or '
4326 If this function was successful, the head stands on the first
4327 character after the closing " or ' and the value of the attribute
4330 bool QXmlSimpleReader::parseAttValue()
4334 const signed char Init = 0;
4335 const signed char Dq = 1; // double quotes were read
4336 const signed char DqRef = 2; // read references in double quotes
4337 const signed char DqC = 3; // signed character read in double quotes
4338 const signed char Sq = 4; // single quotes were read
4339 const signed char SqRef = 5; // read references in single quotes
4340 const signed char SqC = 6; // signed character read in single quotes
4341 const signed char Done = 7;
4343 const signed char InpDq = 0; // "
4344 const signed char InpSq = 1; // '
4345 const signed char InpAmp = 2; // &
4346 const signed char InpLt = 3; // <
4347 const signed char InpUnknown = 4;
4349 // use some kind of state machine for parsing
4350 static signed char table[7][5] = {
4351 /* InpDq InpSq InpAmp InpLt InpUnknown */
4352 { Dq, Sq, -1, -1, -1 }, // Init
4353 { Done, DqC, DqRef, -1, DqC }, // Dq
4354 { Done, DqC, DqRef, -1, DqC }, // DqRef
4355 { Done, DqC, DqRef, -1, DqC }, // DqC
4356 { SqC, Done, SqRef, -1, SqC }, // Sq
4357 { SqC, Done, SqRef, -1, SqC }, // SqRef
4358 { SqC, Done, SqRef, -1, SqC } // SqRef
4360 signed char state = Init;
4362 bool parseOk = TRUE;
4368 d->error = XMLERR_UNEXPECTEDEOF;
4373 } else if ( c == '\'' ) {
4375 } else if ( c == '&' ) {
4377 } else if ( c == '<' ) {
4383 // set state according to input
4384 state = table[state][input];
4386 // do some actions according to state
4395 parseOk = parseReference( tmp, InAttributeValue );
4406 // no input is read after this
4411 d->error = XMLERR_ERRORPARSINGREFERENCE;
4419 d->error = XMLERR_UNEXPECTEDCHARACTER;
4433 Parse a elementdecl [45].
4435 Precondition: the beginning '<!E' is already read and the head
4436 stands on the 'L' of '<!ELEMENT'
4438 bool QXmlSimpleReader::parseElementDecl()
4440 const signed char Init = 0;
4441 const signed char Elem = 1; // parse the beginning string
4442 const signed char Ws1 = 2; // whitespace required
4443 const signed char Nam = 3; // parse Name
4444 const signed char Ws2 = 4; // whitespace required
4445 const signed char Empty = 5; // read EMPTY
4446 const signed char Any = 6; // read ANY
4447 const signed char Cont = 7; // read contentspec (except ANY or EMPTY)
4448 const signed char Mix = 8; // read Mixed
4449 const signed char Mix2 = 9; //
4450 const signed char Mix3 = 10; //
4451 const signed char MixN1 = 11; //
4452 const signed char MixN2 = 12; //
4453 const signed char MixN3 = 13; //
4454 const signed char MixN4 = 14; //
4455 const signed char Cp = 15; // parse cp
4456 const signed char Cp2 = 16; //
4457 const signed char WsD = 17; // eat whitespace before Done
4458 const signed char Done = 18;
4460 const signed char InpWs = 0;
4461 const signed char InpGt = 1; // >
4462 const signed char InpPipe = 2; // |
4463 const signed char InpOp = 3; // (
4464 const signed char InpCp = 4; // )
4465 const signed char InpHash = 5; // #
4466 const signed char InpQm = 6; // ?
4467 const signed char InpAst = 7; // *
4468 const signed char InpPlus = 8; // +
4469 const signed char InpA = 9; // A
4470 const signed char InpE = 10; // E
4471 const signed char InpL = 11; // L
4472 const signed char InpUnknown = 12;
4474 // use some kind of state machine for parsing
4475 static signed char table[18][13] = {
4476 /* InpWs InpGt InpPipe InpOp InpCp InpHash InpQm InpAst InpPlus InpA InpE InpL InpUnknown */
4477 { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, Elem, -1 }, // Init
4478 { Ws1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // Elem
4479 { -1, -1, -1, -1, -1, -1, -1, -1, -1, Nam, Nam, Nam, Nam }, // Ws1
4480 { Ws2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // Nam
4481 { -1, -1, -1, Cont, -1, -1, -1, -1, -1, Any, Empty, -1, -1 }, // Ws2
4482 { WsD, Done, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // Empty
4483 { WsD, Done, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // Any
4484 { -1, -1, -1, Cp, Cp, Mix, -1, -1, -1, Cp, Cp, Cp, Cp }, // Cont
4485 { Mix2, -1, MixN1, -1, Mix3, -1, -1, -1, -1, -1, -1, -1, -1 }, // Mix
4486 { -1, -1, MixN1, -1, Mix3, -1, -1, -1, -1, -1, -1, -1, -1 }, // Mix2
4487 { WsD, Done, -1, -1, -1, -1, -1, WsD, -1, -1, -1, -1, -1 }, // Mix3
4488 { -1, -1, -1, -1, -1, -1, -1, -1, -1, MixN2, MixN2, MixN2, MixN2 }, // MixN1
4489 { MixN3, -1, MixN1, -1, MixN4, -1, -1, -1, -1, -1, -1, -1, -1 }, // MixN2
4490 { -1, -1, MixN1, -1, MixN4, -1, -1, -1, -1, -1, -1, -1, -1 }, // MixN3
4491 { -1, -1, -1, -1, -1, -1, -1, WsD, -1, -1, -1, -1, -1 }, // MixN4
4492 { WsD, Done, -1, -1, -1, -1, Cp2, Cp2, Cp2, -1, -1, -1, -1 }, // Cp
4493 { WsD, Done, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, // Cp2
4494 { -1, Done, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } // WsD
4496 signed char state = Init;
4498 bool parseOk = TRUE;
4504 d->error = XMLERR_UNEXPECTEDEOF;
4509 } else if ( c == '>' ) {
4511 } else if ( c == '|' ) {
4513 } else if ( c == '(' ) {
4515 } else if ( c == ')' ) {
4517 } else if ( c == '#' ) {
4519 } else if ( c == '?' ) {
4521 } else if ( c == '*' ) {
4523 } else if ( c == '+' ) {
4525 } else if ( c == 'A' ) {
4527 } else if ( c == 'E' ) {
4529 } else if ( c == 'L' ) {
4535 //qDebug( "%d -%d(%c)-> %d", state, input, c.latin1(), table[state][input] );
4536 state = table[state][input];
4538 // in some cases do special actions depending on state
4541 parseOk = parseString( "LEMENT" );
4547 parseOk = parseName();
4553 parseOk = parseString( "EMPTY" );
4556 parseOk = parseString( "ANY" );
4562 parseOk = parseString( "#PCDATA" );
4574 parseOk = parseName();
4583 parseOk = parseChoiceSeq();
4595 // no input is read after this
4599 d->error = XMLERR_UNEXPECTEDCHARACTER;
4605 d->error = XMLERR_ERRORPARSINGNAME;
4611 d->error = XMLERR_UNEXPECTEDCHARACTER;
4617 d->error = XMLERR_UNEXPECTEDCHARACTER;
4623 d->error = XMLERR_UNEXPECTEDCHARACTER;
4629 d->error = XMLERR_ERRORPARSINGNAME;
4635 d->error = XMLERR_ERRORPARSINGCHOICE;
4642 d->error = XMLERR_UNEXPECTEDCHARACTER;
4656 Parse a NotationDecl [82].
4658 Precondition: the beginning '<!' is already read and the head
4659 stands on the 'N' of '<!NOTATION'
4661 bool QXmlSimpleReader::parseNotationDecl()
4663 const signed char Init = 0;
4664 const signed char Not = 1; // read NOTATION
4665 const signed char Ws1 = 2; // eat whitespaces
4666 const signed char Nam = 3; // read Name
4667 const signed char Ws2 = 4; // eat whitespaces
4668 const signed char ExtID = 5; // parse ExternalID
4669 const signed char Ws3 = 6; // eat whitespaces
4670 const signed char Done = 7;
4672 const signed char InpWs = 0;
4673 const signed char InpGt = 1; // >
4674 const signed char InpN = 2; // N
4675 const signed char InpUnknown = 3;
4677 // use some kind of state machine for parsing
4678 static signed char table[7][4] = {
4679 /* InpWs InpGt InpN InpUnknown */
4680 { -1, -1, Not, -1 }, // Init
4681 { Ws1, -1, -1, -1 }, // Not
4682 { -1, -1, Nam, Nam }, // Ws1
4683 { Ws2, Done, -1, -1 }, // Nam
4684 { -1, Done, ExtID, ExtID }, // Ws2
4685 { Ws3, Done, -1, -1 }, // ExtID
4686 { -1, Done, -1, -1 } // Ws3
4688 signed char state = Init;
4690 bool parseOk = TRUE;
4696 d->error = XMLERR_UNEXPECTEDEOF;
4701 } else if ( c == '>' ) {
4703 } else if ( c == 'N' ) {
4709 // set state according to input
4710 state = table[state][input];
4712 // do some actions according to state
4715 parseOk = parseString( "NOTATION" );
4721 parseOk = parseName();
4727 parseOk = parseExternalID( TRUE );
4736 // no input is read after this
4740 d->error = XMLERR_UNEXPECTEDCHARACTER;
4746 d->error = XMLERR_ERRORPARSINGNAME;
4752 d->error = XMLERR_ERRORPARSINGEXTERNALID;
4757 if ( !dtdHnd->notationDecl( name(), d->publicId, d->systemId ) ) {
4758 d->error = dtdHnd->errorString();
4767 d->error = XMLERR_UNEXPECTEDCHARACTER;
4781 Parse choice [49] or seq [50].
4783 Precondition: the beginning '('S? is already read and the head
4784 stands on the first non-whitespace character after it.
4786 bool QXmlSimpleReader::parseChoiceSeq()
4788 const signed char Init = 0;
4789 const signed char Ws1 = 1; // eat whitespace
4790 const signed char CS_ = 2; // choice or set
4791 const signed char Ws2 = 3; // eat whitespace
4792 const signed char More = 4; // more cp to read
4793 const signed char Name = 5; // read name
4794 const signed char Done = 6; //
4796 const signed char InpWs = 0; // S
4797 const signed char InpOp = 1; // (
4798 const signed char InpCp = 2; // )
4799 const signed char InpQm = 3; // ?
4800 const signed char InpAst = 4; // *
4801 const signed char InpPlus = 5; // +
4802 const signed char InpPipe = 6; // |
4803 const signed char InpComm = 7; // ,
4804 const signed char InpUnknown = 8;
4806 // use some kind of state machine for parsing
4807 static signed char table[6][9] = {
4808 /* InpWs InpOp InpCp InpQm InpAst InpPlus InpPipe InpComm InpUnknown */
4809 { -1, Ws1, -1, -1, -1, -1, -1, -1, Name }, // Init
4810 { -1, CS_, -1, -1, -1, -1, -1, -1, CS_ }, // Ws1
4811 { Ws2, -1, Done, Ws2, Ws2, Ws2, More, More, -1 }, // CS_
4812 { -1, -1, Done, -1, -1, -1, More, More, -1 }, // Ws2
4813 { -1, Ws1, -1, -1, -1, -1, -1, -1, Name }, // More (same as Init)
4814 { Ws2, -1, Done, Ws2, Ws2, Ws2, More, More, -1 } // Name (same as CS_)
4816 signed char state = Init;
4818 bool parseOk = TRUE;
4824 d->error = XMLERR_UNEXPECTEDEOF;
4829 } else if ( c == '(' ) {
4831 } else if ( c == ')' ) {
4833 } else if ( c == '?' ) {
4835 } else if ( c == '*' ) {
4837 } else if ( c == '+' ) {
4839 } else if ( c == '|' ) {
4841 } else if ( c == ',' ) {
4847 // set state according to input
4848 state = table[state][input];
4850 // do some actions according to state
4856 parseOk = parseChoiceSeq();
4865 parseOk = parseName();
4871 // no input is read after this
4875 d->error = XMLERR_ERRORPARSINGCHOICE;
4881 d->error = XMLERR_ERRORPARSINGNAME;
4889 d->error = XMLERR_UNEXPECTEDCHARACTER;
4903 Parse a EntityDecl [70].
4905 Precondition: the beginning '<!E' is already read and the head
4906 stand on the 'N' of '<!ENTITY'
4908 bool QXmlSimpleReader::parseEntityDecl()
4910 const signed char Init = 0;
4911 const signed char Ent = 1; // parse "ENTITY"
4912 const signed char Ws1 = 2; // white space read
4913 const signed char Name = 3; // parse name
4914 const signed char Ws2 = 4; // white space read
4915 const signed char EValue = 5; // parse entity value
4916 const signed char ExtID = 6; // parse ExternalID
4917 const signed char Ws3 = 7; // white space read
4918 const signed char Ndata = 8; // parse "NDATA"
4919 const signed char Ws4 = 9; // white space read
4920 const signed char NNam = 10; // parse name
4921 const signed char PEDec = 11; // parse PEDecl
4922 const signed char Ws6 = 12; // white space read
4923 const signed char PENam = 13; // parse name
4924 const signed char Ws7 = 14; // white space read
4925 const signed char PEVal = 15; // parse entity value
4926 const signed char PEEID = 16; // parse ExternalID
4927 const signed char WsE = 17; // white space read
4928 const signed char EDDone = 19; // done, but also report an external, unparsed entity decl
4929 const signed char Done = 18;
4931 const signed char InpWs = 0; // white space
4932 const signed char InpPer = 1; // %
4933 const signed char InpQuot = 2; // " or '
4934 const signed char InpGt = 3; // >
4935 const signed char InpN = 4; // N
4936 const signed char InpUnknown = 5;
4938 // use some kind of state machine for parsing
4939 static signed char table[18][6] = {
4940 /* InpWs InpPer InpQuot InpGt InpN InpUnknown */
4941 { -1, -1, -1, -1, Ent, -1 }, // Init
4942 { Ws1, -1, -1, -1, -1, -1 }, // Ent
4943 { -1, PEDec, -1, -1, Name, Name }, // Ws1
4944 { Ws2, -1, -1, -1, -1, -1 }, // Name
4945 { -1, -1, EValue, -1, -1, ExtID }, // Ws2
4946 { WsE, -1, -1, Done, -1, -1 }, // EValue
4947 { Ws3, -1, -1, EDDone,-1, -1 }, // ExtID
4948 { -1, -1, -1, EDDone,Ndata, -1 }, // Ws3
4949 { Ws4, -1, -1, -1, -1, -1 }, // Ndata
4950 { -1, -1, -1, -1, NNam, NNam }, // Ws4
4951 { WsE, -1, -1, Done, -1, -1 }, // NNam
4952 { Ws6, -1, -1, -1, -1, -1 }, // PEDec
4953 { -1, -1, -1, -1, PENam, PENam }, // Ws6
4954 { Ws7, -1, -1, -1, -1, -1 }, // PENam
4955 { -1, -1, PEVal, -1, -1, PEEID }, // Ws7
4956 { WsE, -1, -1, Done, -1, -1 }, // PEVal
4957 { WsE, -1, -1, Done, -1, -1 }, // PEEID
4958 { -1, -1, -1, Done, -1, -1 } // WsE
4960 signed char state = Init;
4962 bool parseOk = TRUE;
4968 d->error = XMLERR_UNEXPECTEDEOF;
4973 } else if ( c == '%' ) {
4975 } else if ( c == '"' || c == '\'' ) {
4977 } else if ( c == '>' ) {
4979 } else if ( c == 'N' ) {
4985 // set state according to input
4986 state = table[state][input];
4988 // do some actions according to state
4991 parseOk = parseString( "NTITY" );
4997 parseOk = parseName();
5003 parseOk = parseEntityValue();
5006 parseOk = parseExternalID();
5012 parseOk = parseString( "NDATA" );
5018 parseOk = parseName( TRUE );
5027 parseOk = parseName();
5033 parseOk = parseEntityValue();
5036 parseOk = parseExternalID();
5048 // no input is read after this
5052 d->error = XMLERR_UNEXPECTEDCHARACTER;
5058 d->error = XMLERR_ERRORPARSINGNAME;
5064 d->error = XMLERR_ERRORPARSINGENTITYVALUE;
5067 if ( !entityExist( name() ) ) {
5068 d->entities.insert( name(), string() );
5070 if ( !declHnd->internalEntityDecl( name(), string() ) ) {
5071 d->error = declHnd->errorString();
5079 d->error = XMLERR_ERRORPARSINGEXTERNALID;
5085 d->error = XMLERR_UNEXPECTEDCHARACTER;
5091 d->error = XMLERR_ERRORPARSINGNAME;
5094 if ( !entityExist( name() ) ) {
5095 d->externEntities.insert( name(), QXmlSimpleReaderPrivate::ExternEntity( d->publicId, d->systemId, ref() ) );
5097 if ( !dtdHnd->unparsedEntityDecl( name(), d->publicId, d->systemId, ref() ) ) {
5098 d->error = declHnd->errorString();
5106 d->error = XMLERR_ERRORPARSINGNAME;
5112 d->error = XMLERR_ERRORPARSINGENTITYVALUE;
5115 if ( !entityExist( name() ) ) {
5116 d->parameterEntities.insert( name(), string() );
5118 if ( !declHnd->internalEntityDecl( QString("%")+name(), string() ) ) {
5119 d->error = declHnd->errorString();
5127 d->error = XMLERR_ERRORPARSINGEXTERNALID;
5130 if ( !entityExist( name() ) ) {
5131 d->externParameterEntities.insert( name(), QXmlSimpleReaderPrivate::ExternParameterEntity( d->publicId, d->systemId ) );
5133 if ( !declHnd->externalEntityDecl( QString("%")+name(), d->publicId, d->systemId ) ) {
5134 d->error = declHnd->errorString();
5141 if ( !entityExist( name() ) ) {
5142 d->externEntities.insert( name(), QXmlSimpleReaderPrivate::ExternEntity( d->publicId, d->systemId, QString::null ) );
5144 if ( !declHnd->externalEntityDecl( name(), d->publicId, d->systemId ) ) {
5145 d->error = declHnd->errorString();
5155 d->error = XMLERR_LETTEREXPECTED;
5169 Parse a EntityValue [9]
5171 bool QXmlSimpleReader::parseEntityValue()
5175 const signed char Init = 0;
5176 const signed char Dq = 1; // EntityValue is double quoted
5177 const signed char DqC = 2; // signed character
5178 const signed char DqPER = 3; // PERefence
5179 const signed char DqRef = 4; // Reference
5180 const signed char Sq = 5; // EntityValue is double quoted
5181 const signed char SqC = 6; // signed character
5182 const signed char SqPER = 7; // PERefence
5183 const signed char SqRef = 8; // Reference
5184 const signed char Done = 9;
5186 const signed char InpDq = 0; // "
5187 const signed char InpSq = 1; // '
5188 const signed char InpAmp = 2; // &
5189 const signed char InpPer = 3; // %
5190 const signed char InpUnknown = 4;
5192 // use some kind of state machine for parsing
5193 static signed char table[9][5] = {
5194 /* InpDq InpSq InpAmp InpPer InpUnknown */
5195 { Dq, Sq, -1, -1, -1 }, // Init
5196 { Done, DqC, DqRef, DqPER, DqC }, // Dq
5197 { Done, DqC, DqRef, DqPER, DqC }, // DqC
5198 { Done, DqC, DqRef, DqPER, DqC }, // DqPER
5199 { Done, DqC, DqRef, DqPER, DqC }, // DqRef
5200 { SqC, Done, SqRef, SqPER, SqC }, // Sq
5201 { SqC, Done, SqRef, SqPER, SqC }, // SqC
5202 { SqC, Done, SqRef, SqPER, SqC }, // SqPER
5203 { SqC, Done, SqRef, SqPER, SqC } // SqRef
5205 signed char state = Init;
5207 bool parseOk = TRUE;
5213 d->error = XMLERR_UNEXPECTEDEOF;
5218 } else if ( c == '\'' ) {
5220 } else if ( c == '&' ) {
5222 } else if ( c == '%' ) {
5228 // set state according to input
5229 state = table[state][input];
5231 // do some actions according to state
5245 parseOk = parsePEReference( InEntityValue );
5249 parseOk = parseReference( tmp, InEntityValue );
5255 // no input is read after this
5260 d->error = XMLERR_ERRORPARSINGDOCTYPE;
5267 d->error = XMLERR_ERRORPARSINGREFERENCE;
5275 d->error = XMLERR_LETTEREXPECTED;
5289 Parse a comment [15].
5291 Precondition: the beginning '<!' of the comment is already read and the head
5292 stands on the first '-' of '<!--'.
5294 If this funktion was successful, the head-position is on the first
5295 character after the comment.
5297 bool QXmlSimpleReader::parseComment()
5299 const signed char Init = 0;
5300 const signed char Dash1 = 1; // the first dash was read
5301 const signed char Dash2 = 2; // the second dash was read
5302 const signed char Com = 3; // read comment
5303 const signed char Com2 = 4; // read comment (help state)
5304 const signed char ComE = 5; // finished reading comment
5305 const signed char Done = 6;
5307 const signed char InpDash = 0; // -
5308 const signed char InpGt = 1; // >
5309 const signed char InpUnknown = 2;
5311 // use some kind of state machine for parsing
5312 static signed char table[6][3] = {
5313 /* InpDash InpGt InpUnknown */
5314 { Dash1, -1, -1 }, // Init
5315 { Dash2, -1, -1 }, // Dash1
5316 { Com2, Com, Com }, // Dash2
5317 { Com2, Com, Com }, // Com
5318 { ComE, Com, Com }, // Com2
5319 { -1, Done, -1 } // ComE
5321 signed char state = Init;
5328 d->error = XMLERR_UNEXPECTEDEOF;
5333 } else if ( c == '>' ) {
5339 // set state according to input
5340 state = table[state][input];
5342 // do some actions according to state
5364 // no input is read after this
5370 // if next character is not a dash than don't skip it
5379 d->error = XMLERR_ERRORPARSINGCOMMENT;
5393 Parse a Attribute [41].
5395 Precondition: the head stands on the first character of the name of the
5396 attribute (i.e. all whitespaces are already parsed).
5398 The head stand on the next character after the end quotes. The variable name
5399 contains the name of the attribute and the variable string contains the value
5402 bool QXmlSimpleReader::parseAttribute()
5404 const signed char Init = 0;
5405 const signed char PName = 1; // parse name
5406 const signed char Ws = 2; // eat ws
5407 const signed char Eq = 3; // the '=' was read
5408 const signed char Quotes = 4; // " or ' were read
5410 const signed char InpNameBe = 0;
5411 const signed char InpEq = 1; // =
5412 const signed char InpDq = 2; // "
5413 const signed char InpSq = 3; // '
5414 const signed char InpUnknown = 4;
5416 // use some kind of state machine for parsing
5417 static signed char table[4][5] = {
5418 /* InpNameBe InpEq InpDq InpSq InpUnknown */
5419 { PName, -1, -1, -1, -1 }, // Init
5420 { -1, Eq, -1, -1, Ws }, // PName
5421 { -1, Eq, -1, -1, -1 }, // Ws
5422 { -1, -1, Quotes, Quotes, -1 } // Eq
5424 signed char state = Init;
5426 bool parseOk = TRUE;
5432 d->error = XMLERR_UNEXPECTEDEOF;
5435 if ( is_NameBeginning(c) ) {
5437 } else if ( c == '=' ) {
5439 } else if ( c == '"' ) {
5441 } else if ( c == '\'' ) {
5447 // set state according to input
5448 state = table[state][input];
5450 // do some actions according to state
5453 parseOk = parseName();
5462 parseOk = parseAttValue();
5465 // no input is read after this
5469 d->error = XMLERR_ERRORPARSINGNAME;
5475 d->error = XMLERR_ERRORPARSINGATTVALUE;
5482 d->error = XMLERR_UNEXPECTEDCHARACTER;
5496 Parse a Name [5] and store the name in name or ref (if useRef is TRUE).
5498 bool QXmlSimpleReader::parseName( bool useRef )
5500 const signed char Init = 0;
5501 const signed char Name1 = 1; // parse first signed character of the name
5502 const signed char Name = 2; // parse name
5503 const signed char Done = 3;
5505 const signed char InpNameBe = 0; // name beginning signed characters
5506 const signed char InpNameCh = 1; // NameChar without InpNameBe
5507 const signed char InpUnknown = 2;
5509 // use some kind of state machine for parsing
5510 static signed char table[3][3] = {
5511 /* InpNameBe InpNameCh InpUnknown */
5512 { Name1, -1, -1 }, // Init
5513 { Name, Name, Done }, // Name1
5514 { Name, Name, Done } // Name
5516 signed char state = Init;
5523 d->error = XMLERR_UNEXPECTEDEOF;
5526 if ( is_NameBeginning(c) ) {
5528 } else if ( is_NameChar(c) ) {
5534 // set state according to input
5535 state = table[state][input];
5537 // do some actions according to state
5558 // no input is read after this
5564 d->error = XMLERR_LETTEREXPECTED;
5578 Parse a Nmtoken [7] and store the name in name.
5580 bool QXmlSimpleReader::parseNmtoken()
5582 const signed char Init = 0;
5583 const signed char NameF = 1;
5584 const signed char Name = 2;
5585 const signed char Done = 3;
5587 const signed char InpNameCh = 0; // NameChar without InpNameBe
5588 const signed char InpUnknown = 1;
5590 // use some kind of state machine for parsing
5591 static signed char table[3][2] = {
5592 /* InpNameCh InpUnknown */
5593 { NameF, -1 }, // Init
5594 { Name, Done }, // NameF
5595 { Name, Done } // Name
5597 signed char state = Init;
5604 d->error = XMLERR_UNEXPECTEDEOF;
5607 if ( is_NameChar(c) ) {
5613 // set state according to input
5614 state = table[state][input];
5616 // do some actions according to state
5628 // no input is read after this
5634 d->error = XMLERR_LETTEREXPECTED;
5648 Parse a Reference [67].
5650 charDataRead is set to TRUE if the reference must not be parsed. The
5651 character(s) which the reference mapped to are appended to string. The
5652 head stands on the first character after the reference.
5654 charDataRead is set to FALSE if the reference must be parsed. The
5655 character(s) which the reference mapped to are inserted at the reference
5656 position. The head stands on the first character of the replacement).
5658 bool QXmlSimpleReader::parseReference( bool &charDataRead, EntityRecognitionContext context )
5660 // temporary variables
5664 const signed char Init = 0;
5665 const signed char SRef = 1; // start of a reference
5666 const signed char ChRef = 2; // parse CharRef
5667 const signed char ChDec = 3; // parse CharRef decimal
5668 const signed char ChHexS = 4; // start CharRef hexadecimal
5669 const signed char ChHex = 5; // parse CharRef hexadecimal
5670 const signed char Name = 6; // parse name
5671 const signed char DoneD = 7; // done CharRef decimal
5672 const signed char DoneH = 8; // done CharRef hexadecimal
5673 const signed char DoneN = 9; // done EntityRef
5675 const signed char InpAmp = 0; // &
5676 const signed char InpSemi = 1; // ;
5677 const signed char InpHash = 2; // #
5678 const signed char InpX = 3; // x
5679 const signed char InpNum = 4; // 0-9
5680 const signed char InpHex = 5; // a-f A-F
5681 const signed char InpUnknown = 6;
5683 // use some kind of state machine for parsing
5684 static signed char table[8][7] = {
5685 /* InpAmp InpSemi InpHash InpX InpNum InpHex InpUnknown */
5686 { SRef, -1, -1, -1, -1, -1, -1 }, // Init
5687 { -1, -1, ChRef, Name, Name, Name, Name }, // SRef
5688 { -1, -1, -1, ChHexS, ChDec, -1, -1 }, // ChRef
5689 { -1, DoneD, -1, -1, ChDec, -1, -1 }, // ChDec
5690 { -1, -1, -1, -1, ChHex, ChHex, -1 }, // ChHexS
5691 { -1, DoneH, -1, -1, ChHex, ChHex, -1 }, // ChHex
5692 { -1, DoneN, -1, -1, -1, -1, -1 } // Name
5694 signed char state = Init;
5701 d->error = XMLERR_UNEXPECTEDEOF;
5706 } else if ( c.cell() == '&' ) {
5708 } else if ( c.cell() == ';' ) {
5710 } else if ( c.cell() == '#' ) {
5712 } else if ( c.cell() == 'x' ) {
5714 } else if ( '0' <= c.cell() && c.cell() <= '9' ) {
5716 } else if ( 'a' <= c.cell() && c.cell() <= 'f' ) {
5718 } else if ( 'A' <= c.cell() && c.cell() <= 'F' ) {
5724 // set state according to input
5725 state = table[state][input];
5727 // do some actions according to state
5748 // read the name into the ref
5752 tmp = ref().toUInt( &ok, 10 );
5754 stringAddC( QChar(tmp) );
5756 d->error = XMLERR_ERRORPARSINGREFERENCE;
5759 charDataRead = TRUE;
5763 tmp = ref().toUInt( &ok, 16 );
5765 stringAddC( QChar(tmp) );
5767 d->error = XMLERR_ERRORPARSINGREFERENCE;
5770 charDataRead = TRUE;
5774 if ( !processReference( charDataRead, context ) )
5779 // no input is read after this
5789 d->error = XMLERR_ERRORPARSINGREFERENCE;
5803 Helper function for parseReference()
5805 bool QXmlSimpleReader::processReference( bool &charDataRead, EntityRecognitionContext context )
5807 QString reference = ref();
5808 if ( reference == "amp" ) {
5809 if ( context == InEntityValue ) {
5811 stringAddC( '&' ); stringAddC( 'a' ); stringAddC( 'm' ); stringAddC( 'p' ); stringAddC( ';' );
5813 // Included or Included in literal
5816 charDataRead = TRUE;
5817 } else if ( reference == "lt" ) {
5818 if ( context == InEntityValue ) {
5820 stringAddC( '&' ); stringAddC( 'l' ); stringAddC( 't' ); stringAddC( ';' );
5822 // Included or Included in literal
5825 charDataRead = TRUE;
5826 } else if ( reference == "gt" ) {
5827 if ( context == InEntityValue ) {
5829 stringAddC( '&' ); stringAddC( 'g' ); stringAddC( 't' ); stringAddC( ';' );
5831 // Included or Included in literal
5834 charDataRead = TRUE;
5835 } else if ( reference == "apos" ) {
5836 if ( context == InEntityValue ) {
5838 stringAddC( '&' ); stringAddC( 'a' ); stringAddC( 'p' ); stringAddC( 'o' ); stringAddC( 's' ); stringAddC( ';' );
5840 // Included or Included in literal
5843 charDataRead = TRUE;
5844 } else if ( reference == "quot" ) {
5845 if ( context == InEntityValue ) {
5847 stringAddC( '&' ); stringAddC( 'q' ); stringAddC( 'u' ); stringAddC( 'o' ); stringAddC( 't' ); stringAddC( ';' );
5849 // Included or Included in literal
5852 charDataRead = TRUE;
5854 QMap<QString,QString>::Iterator it;
5855 it = d->entities.find( reference );
5856 if ( it != d->entities.end() ) {
5857 // "Internal General"
5858 switch ( context ) {
5861 xmlRef = it.data() + xmlRef;
5862 charDataRead = FALSE;
5864 case InAttributeValue:
5865 // Included in literal
5866 xmlRef = it.data().replace( QRegExp("\""), """ ).replace( QRegExp("'"), "'" )
5868 charDataRead = FALSE;
5874 for ( int i=0; i<(int)reference.length(); i++ ) {
5875 stringAddC( reference[i] );
5878 charDataRead = TRUE;
5883 d->error = XMLERR_INTERNALGENERALENTITYINDTD;
5884 charDataRead = FALSE;
5888 QMap<QString,QXmlSimpleReaderPrivate::ExternEntity>::Iterator itExtern;
5889 itExtern = d->externEntities.find( reference );
5890 if ( itExtern == d->externEntities.end() ) {
5891 // entity not declared
5892 // ### check this case for conformance
5893 if ( context == InEntityValue ) {
5896 for ( int i=0; i<(int)reference.length(); i++ ) {
5897 stringAddC( reference[i] );
5900 charDataRead = TRUE;
5903 if ( !contentHnd->skippedEntity( reference ) ) {
5904 d->error = contentHnd->errorString();
5905 return FALSE; // error
5909 } else if ( (*itExtern).notation.isNull() ) {
5910 // "External Parsed General"
5911 switch ( context ) {
5913 // Included if validating
5915 if ( !contentHnd->skippedEntity( reference ) ) {
5916 d->error = contentHnd->errorString();
5917 return FALSE; // error
5920 charDataRead = FALSE;
5922 case InAttributeValue:
5924 d->error = XMLERR_EXTERNALGENERALENTITYINAV;
5925 charDataRead = FALSE;
5931 for ( int i=0; i<(int)reference.length(); i++ ) {
5932 stringAddC( reference[i] );
5935 charDataRead = TRUE;
5940 d->error = XMLERR_EXTERNALGENERALENTITYINDTD;
5941 charDataRead = FALSE;
5946 // ### notify for "Occurs as Attribute Value" missing (but this is no reference, anyway)
5948 d->error = XMLERR_UNPARSEDENTITYREFERENCE;
5949 charDataRead = FALSE;
5950 return FALSE; // error
5954 return TRUE; // no error
5959 Parse over a simple string.
5961 After the string was successfully parsed, the head is on the first
5962 character after the string.
5964 bool QXmlSimpleReader::parseString( const QString& s )
5966 signed char Done = s.length();
5968 const signed char InpCharExpected = 0; // the character that was expected
5969 const signed char InpUnknown = 1;
5971 signed char state = 0; // state in this function is the position in the string s
5978 d->error = XMLERR_UNEXPECTEDEOF;
5981 if ( c == s[(int)state] ) {
5982 input = InpCharExpected;
5987 // set state according to input
5988 if ( input == InpCharExpected ) {
5992 d->error = XMLERR_UNEXPECTEDCHARACTER;
5996 // do some actions according to state
5998 // no input is read after this
5999 if ( state == Done ) {
6014 Inits the data values.
6016 void QXmlSimpleReader::init( const QXmlInputSource& i )
6019 xmlLength = xml.length();
6022 d->externParameterEntities.clear();
6023 d->parameterEntities.clear();
6024 d->externEntities.clear();
6025 d->entities.clear();
6032 d->standalone = QXmlSimpleReaderPrivate::Unknown;
6038 d->error = XMLERR_OK;
6042 Returns TRUE if a entity with the name \a e exists,
6043 otherwise returns FALSE.
6045 bool QXmlSimpleReader::entityExist( const QString& e ) const
6047 if ( d->parameterEntities.find(e) == d->parameterEntities.end() &&
6048 d->externParameterEntities.find(e) == d->externParameterEntities.end() ) {
6055 void QXmlSimpleReader::reportParseError()
6058 errorHnd->fatalError( QXmlParseException( d->error, columnNr+1, lineNr+1 ) );