2 * libxml++ and this file are copyright (C) 2000 by Ari Johnson, and
3 * are covered by the GNU Lesser General Public License, which should be
4 * included with libxml++ as the file COPYING.
6 * 2002/01/05 Valentin Rusu - fixed some potential buffer overruns
7 * 2002/01/21 Valentin Rusu - added CDATA handlers
10 #include "libxml++/parsers/saxparser.h"
11 #include "libxml++/nodes/element.h"
12 #include "libxml++/keepblanks.h"
14 #include <libxml/parser.h>
15 #include <libxml/parserInternals.h> // for xmlCreateFileParserCtxt
17 #include <cstdarg> //For va_list.
22 struct SaxParserCallback
24 static xmlEntityPtr get_entity(void* context, const xmlChar* name);
25 static void entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content);
26 static void start_document(void* context);
27 static void end_document(void* context);
28 static void start_element(void* context, const xmlChar* name, const xmlChar** p);
29 static void end_element(void* context, const xmlChar* name);
30 static void characters(void* context, const xmlChar* ch, int len);
31 static void comment(void* context, const xmlChar* value);
32 static void warning(void* context, const char* fmt, ...);
33 static void error(void* context, const char* fmt, ...);
34 static void fatal_error(void* context, const char* fmt, ...);
35 static void cdata_block(void* context, const xmlChar* value, int len);
36 static void internal_subset(void* context, const xmlChar* name, const xmlChar*publicId, const xmlChar*systemId);
41 SaxParser::SaxParser(bool use_get_entity)
42 : sax_handler_( new _xmlSAXHandler )
44 xmlSAXHandler temp = {
45 SaxParserCallback::internal_subset,
47 0, // hasInternalSubset
48 0, // hasExternalSubset
50 use_get_entity ? SaxParserCallback::get_entity : 0, // getEntity
51 SaxParserCallback::entity_decl, // entityDecl
55 0, // unparsedEntityDecl
56 0, // setDocumentLocator
57 SaxParserCallback::start_document, // startDocument
58 SaxParserCallback::end_document, // endDocument
59 SaxParserCallback::start_element, // startElement
60 SaxParserCallback::end_element, // endElement
62 SaxParserCallback::characters, // characters
63 0, // ignorableWhitespace
64 0, // processingInstruction
65 SaxParserCallback::comment, // comment
66 SaxParserCallback::warning, // warning
67 SaxParserCallback::error, // error
68 SaxParserCallback::fatal_error, // fatalError
69 0, // getParameterEntity
70 SaxParserCallback::cdata_block, // cdataBlock
81 SaxParser::~SaxParser()
86 xmlEntityPtr SaxParser::on_get_entity(const Glib::ustring& name)
88 return entity_resolver_doc_.get_entity(name);
91 void SaxParser::on_entity_declaration(const Glib::ustring& name, XmlEntityType type, const Glib::ustring& publicId, const Glib::ustring& systemId, const Glib::ustring& content)
93 entity_resolver_doc_.set_entity_declaration(name, type, publicId, systemId, content);
96 void SaxParser::on_start_document()
100 void SaxParser::on_end_document()
104 void SaxParser::on_start_element(const Glib::ustring& /* name */, const AttributeList& /* attributes */)
108 void SaxParser::on_end_element(const Glib::ustring& /* name */)
112 void SaxParser::on_characters(const Glib::ustring& /* text */)
116 void SaxParser::on_comment(const Glib::ustring& /* text */)
120 void SaxParser::on_warning(const Glib::ustring& /* text */)
124 void SaxParser::on_error(const Glib::ustring& /* text */)
129 void SaxParser::on_fatal_error(const Glib::ustring& text)
131 throw parse_error("Fatal error: " + text);
134 void SaxParser::on_cdata_block(const Glib::ustring& /* text */)
138 void SaxParser::on_internal_subset(const Glib::ustring& name,
139 const Glib::ustring& publicId,
140 const Glib::ustring& systemId)
142 entity_resolver_doc_.set_internal_subset(name, publicId, systemId);
145 // implementation of this function is inspired by the SAX documentation by James Henstridge.
146 // (http://www.daa.com.au/~james/gnome/xml-sax/implementing.html)
147 void SaxParser::parse()
149 //TODO If this is not the first parsing with this SaxParser, the xmlDoc object
150 // in entity_resolver_doc_ should be deleted and replaced by a new one.
151 // Otherwise entity declarations from a previous parsing may erroneously affect
152 // this parsing. This would be much easier if entity_resolver_doc_ were a
153 // std::auto_ptr<Document>, so the xmlpp::Document could be deleted and a new
154 // one created. A good place for such code would be in an overridden
155 // SaxParser::initialize_context(). It would be an ABI break.
159 throw internal_error("Parser context not created.");
162 auto old_sax = context_->sax;
163 context_->sax = sax_handler_.get();
166 initialize_context();
168 const int parseError = xmlParseDocument(context_);
170 context_->sax = old_sax;
172 auto error_str = format_xml_parser_error(context_);
173 if (error_str.empty() && parseError == -1)
174 error_str = "xmlParseDocument() failed.";
176 release_underlying(); // Free context_
178 check_for_exception();
180 if(!error_str.empty())
182 throw parse_error(error_str);
186 void SaxParser::parse_file(const Glib::ustring& filename)
190 throw parse_error("Attempt to start a second parse while a parse is in progress.");
193 KeepBlanks k(KeepBlanks::Default);
195 context_ = xmlCreateFileParserCtxt(filename.c_str());
199 void SaxParser::parse_memory_raw(const unsigned char* contents, size_type bytes_count)
203 throw parse_error("Attempt to start a second parse while a parse is in progress.");
206 KeepBlanks k(KeepBlanks::Default);
208 context_ = xmlCreateMemoryParserCtxt((const char*)contents, bytes_count);
212 void SaxParser::parse_memory(const Glib::ustring& contents)
214 parse_memory_raw((const unsigned char*)contents.c_str(), contents.bytes());
217 void SaxParser::parse_stream(std::istream& in)
221 throw parse_error("Attempt to start a second parse while a parse is in progress.");
224 KeepBlanks k(KeepBlanks::Default);
227 context_ = xmlCreatePushParserCtxt(
232 0); // no filename for fetching external entities
236 throw internal_error("Could not create parser context\n" + format_xml_error());
239 initialize_context();
241 //TODO: Shouldn't we use a Glib::ustring here, and some alternative to std::getline()?
242 int firstParseError = XML_ERR_OK;
244 while( ( ! exception_ )
245 && std::getline(in, line))
247 // since getline does not get the line separator, we have to add it since the parser care
248 // about layout in certain cases.
251 const int parseError = xmlParseChunk(context_, line.c_str(),
252 line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */,
253 0 /* don't terminate */);
255 // Save the first error code if any, but read on.
256 // More errors might be reported and then thrown by check_for_exception().
257 if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
258 firstParseError = parseError;
263 //This is called just to terminate parsing.
264 const int parseError = xmlParseChunk(context_, 0 /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);
266 if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
267 firstParseError = parseError;
270 auto error_str = format_xml_parser_error(context_);
271 if (error_str.empty() && firstParseError != XML_ERR_OK)
272 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(firstParseError);
274 release_underlying(); // Free context_
276 check_for_exception();
278 if(!error_str.empty())
280 throw parse_error(error_str);
284 void SaxParser::parse_chunk(const Glib::ustring& chunk)
286 parse_chunk_raw((const unsigned char*)chunk.c_str(), chunk.bytes());
289 void SaxParser::parse_chunk_raw(const unsigned char* contents, size_type bytes_count)
291 KeepBlanks k(KeepBlanks::Default);
296 context_ = xmlCreatePushParserCtxt(
301 0); // no filename for fetching external entities
305 throw internal_error("Could not create parser context\n" + format_xml_error());
307 initialize_context();
310 xmlCtxtResetLastError(context_);
312 int parseError = XML_ERR_OK;
314 parseError = xmlParseChunk(context_, (const char*)contents, bytes_count, 0 /* don't terminate */);
316 check_for_exception();
318 auto error_str = format_xml_parser_error(context_);
319 if (error_str.empty() && parseError != XML_ERR_OK)
320 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(parseError);
321 if(!error_str.empty())
323 throw parse_error(error_str);
327 void SaxParser::release_underlying()
329 Parser::release_underlying();
332 void SaxParser::finish_chunk_parsing()
337 context_ = xmlCreatePushParserCtxt(
339 0, // this, // user_data
342 0); // no filename for fetching external entities
346 throw internal_error("Could not create parser context\n" + format_xml_error());
348 initialize_context();
351 xmlCtxtResetLastError(context_);
353 int parseError = XML_ERR_OK;
355 //This is called just to terminate parsing.
356 parseError = xmlParseChunk(context_, 0 /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);
358 auto error_str = format_xml_parser_error(context_);
359 if (error_str.empty() && parseError != XML_ERR_OK)
360 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(parseError);
362 release_underlying(); // Free context_
364 check_for_exception();
366 if(!error_str.empty())
368 throw parse_error(error_str);
373 xmlEntityPtr SaxParserCallback::get_entity(void* context, const xmlChar* name)
375 auto the_context = static_cast<_xmlParserCtxt*>(context);
376 auto parser = static_cast<SaxParser*>(the_context->_private);
377 xmlEntityPtr result = nullptr;
381 result = parser->on_get_entity((const char*)name);
383 catch(const exception& e)
385 parser->handleException(e);
391 void SaxParserCallback::entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
393 auto the_context = static_cast<_xmlParserCtxt*>(context);
394 auto parser = static_cast<SaxParser*>(the_context->_private);
398 parser->on_entity_declaration(
399 ( name ? Glib::ustring((const char*)name) : ""),
400 static_cast<XmlEntityType>(type),
401 ( publicId ? Glib::ustring((const char*)publicId) : ""),
402 ( systemId ? Glib::ustring((const char*)systemId) : ""),
403 ( content ? Glib::ustring((const char*)content) : "") );
405 catch(const exception& e)
407 parser->handleException(e);
411 void SaxParserCallback::start_document(void* context)
413 auto the_context = static_cast<_xmlParserCtxt*>(context);
414 auto parser = static_cast<SaxParser*>(the_context->_private);
418 parser->on_start_document();
420 catch(const exception& e)
422 parser->handleException(e);
426 void SaxParserCallback::end_document(void* context)
428 auto the_context = static_cast<_xmlParserCtxt*>(context);
429 auto parser = static_cast<SaxParser*>(the_context->_private);
431 if(parser->exception_)
436 parser->on_end_document();
438 catch(const exception& e)
440 parser->handleException(e);
444 void SaxParserCallback::start_element(void* context,
448 auto the_context = static_cast<_xmlParserCtxt*>(context);
449 auto parser = static_cast<SaxParser*>(the_context->_private);
451 SaxParser::AttributeList attributes;
454 for(const xmlChar** cur = p; cur && *cur; cur += 2)
455 attributes.push_back(
456 SaxParser::Attribute( (char*)*cur, (char*)*(cur + 1) ));
460 parser->on_start_element(Glib::ustring((const char*) name), attributes);
462 catch(const exception& e)
464 parser->handleException(e);
468 void SaxParserCallback::end_element(void* context, const xmlChar* name)
470 auto the_context = static_cast<_xmlParserCtxt*>(context);
471 auto parser = static_cast<SaxParser*>(the_context->_private);
475 parser->on_end_element(Glib::ustring((const char*) name));
477 catch(const exception& e)
479 parser->handleException(e);
483 void SaxParserCallback::characters(void * context, const xmlChar* ch, int len)
485 auto the_context = static_cast<_xmlParserCtxt*>(context);
486 auto parser = static_cast<SaxParser*>(the_context->_private);
490 // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
491 // instead of Glib::ustring::ustring( const char*, size_type ) because it
492 // expects the length of the string in characters, not in bytes.
493 parser->on_characters(
495 reinterpret_cast<const char *>(ch),
496 reinterpret_cast<const char *>(ch + len) ) );
498 catch(const exception& e)
500 parser->handleException(e);
504 void SaxParserCallback::comment(void* context, const xmlChar* value)
506 auto the_context = static_cast<_xmlParserCtxt*>(context);
507 auto parser = static_cast<SaxParser*>(the_context->_private);
511 parser->on_comment(Glib::ustring((const char*) value));
513 catch(const exception& e)
515 parser->handleException(e);
519 void SaxParserCallback::warning(void* context, const char* fmt, ...)
521 auto the_context = static_cast<_xmlParserCtxt*>(context);
522 auto parser = static_cast<SaxParser*>(the_context->_private);
525 char buff[1024]; //TODO: Larger/Shared
528 vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
533 parser->on_warning(Glib::ustring(buff));
535 catch(const exception& e)
537 parser->handleException(e);
541 void SaxParserCallback::error(void* context, const char* fmt, ...)
543 auto the_context = static_cast<_xmlParserCtxt*>(context);
544 auto parser = static_cast<SaxParser*>(the_context->_private);
547 char buff[1024]; //TODO: Larger/Shared
549 if(parser->exception_)
553 vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
558 parser->on_error(Glib::ustring(buff));
560 catch(const exception& e)
562 parser->handleException(e);
566 void SaxParserCallback::fatal_error(void* context, const char* fmt, ...)
568 auto the_context = static_cast<_xmlParserCtxt*>(context);
569 auto parser = static_cast<SaxParser*>(the_context->_private);
572 char buff[1024]; //TODO: Larger/Shared
575 vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
580 parser->on_fatal_error(Glib::ustring(buff));
582 catch(const exception& e)
584 parser->handleException(e);
588 void SaxParserCallback::cdata_block(void* context, const xmlChar* value, int len)
590 auto the_context = static_cast<_xmlParserCtxt*>(context);
591 auto parser = static_cast<SaxParser*>(the_context->_private);
595 // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
596 // see comments in SaxParserCallback::characters
597 parser->on_cdata_block(
599 reinterpret_cast<const char *>(value),
600 reinterpret_cast<const char *>(value + len) ) );
602 catch(const exception& e)
604 parser->handleException(e);
608 void SaxParserCallback::internal_subset(void* context, const xmlChar* name,
609 const xmlChar* publicId, const xmlChar* systemId)
611 auto the_context = static_cast<_xmlParserCtxt*>(context);
612 auto parser = static_cast<SaxParser*>(the_context->_private);
616 const auto pid = publicId ? Glib::ustring((const char*) publicId) : "";
617 const auto sid = systemId ? Glib::ustring((const char*) systemId) : "";
619 parser->on_internal_subset( Glib::ustring((const char*) name), pid, sid);
621 catch(const exception& e)
623 parser->handleException(e);