2 * libxml++ and this file are copyright (C) 2000 by Ari Johnson, and
3 * are covered by the GNU Lesser General Public License, which should be
4 * included with libxml++ as the file COPYING.
6 * 2002/01/05 Valentin Rusu - fixed some potential buffer overruns
7 * 2002/01/21 Valentin Rusu - added CDATA handlers
10 #include "libxml++/parsers/saxparser.h"
11 #include "libxml++/nodes/element.h"
12 #include "libxml++/keepblanks.h"
14 #include <libxml/parser.h>
15 #include <libxml/parserInternals.h> // for xmlCreateFileParserCtxt
17 #include <cstdarg> //For va_list.
22 struct SaxParserCallback
24 static xmlEntityPtr get_entity(void* context, const xmlChar* name);
25 static void entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content);
26 static void start_document(void* context);
27 static void end_document(void* context);
28 static void start_element(void* context, const xmlChar* name, const xmlChar** p);
29 static void end_element(void* context, const xmlChar* name);
30 static void characters(void* context, const xmlChar* ch, int len);
31 static void comment(void* context, const xmlChar* value);
32 static void warning(void* context, const char* fmt, ...);
33 static void error(void* context, const char* fmt, ...);
34 static void fatal_error(void* context, const char* fmt, ...);
35 static void cdata_block(void* context, const xmlChar* value, int len);
36 static void internal_subset(void* context, const xmlChar* name, const xmlChar*publicId, const xmlChar*systemId);
41 SaxParser::SaxParser(bool use_get_entity)
42 : sax_handler_(new _xmlSAXHandler), entity_resolver_doc_(new Document)
44 xmlSAXHandler temp = {
45 SaxParserCallback::internal_subset,
46 nullptr, // isStandalone
47 nullptr, // hasInternalSubset
48 nullptr, // hasExternalSubset
49 nullptr, // resolveEntity
50 use_get_entity ? SaxParserCallback::get_entity : nullptr, // getEntity
51 SaxParserCallback::entity_decl, // entityDecl
52 nullptr, // notationDecl
53 nullptr, // attributeDecl
54 nullptr, // elementDecl
55 nullptr, // unparsedEntityDecl
56 nullptr, // setDocumentLocator
57 SaxParserCallback::start_document, // startDocument
58 SaxParserCallback::end_document, // endDocument
59 SaxParserCallback::start_element, // startElement
60 SaxParserCallback::end_element, // endElement
62 SaxParserCallback::characters, // characters
63 nullptr, // ignorableWhitespace
64 nullptr, // processingInstruction
65 SaxParserCallback::comment, // comment
66 SaxParserCallback::warning, // warning
67 SaxParserCallback::error, // error
68 SaxParserCallback::fatal_error, // fatalError
69 nullptr, // getParameterEntity
70 SaxParserCallback::cdata_block, // cdataBlock
71 nullptr, // externalSubset
74 nullptr, // startElementNs
75 nullptr, // endElementNs
80 // The default action is to call on_warning(), on_error(), on_fatal_error().
81 set_throw_messages(false);
84 SaxParser::~SaxParser()
89 xmlEntityPtr SaxParser::on_get_entity(const Glib::ustring& name)
91 return entity_resolver_doc_->get_entity(name);
94 void SaxParser::on_entity_declaration(const Glib::ustring& name, XmlEntityType type, const Glib::ustring& publicId, const Glib::ustring& systemId, const Glib::ustring& content)
96 entity_resolver_doc_->set_entity_declaration(name, type, publicId, systemId, content);
99 void SaxParser::on_start_document()
103 void SaxParser::on_end_document()
107 void SaxParser::on_start_element(const Glib::ustring& /* name */, const AttributeList& /* attributes */)
111 void SaxParser::on_end_element(const Glib::ustring& /* name */)
115 void SaxParser::on_characters(const Glib::ustring& /* text */)
119 void SaxParser::on_comment(const Glib::ustring& /* text */)
123 void SaxParser::on_warning(const Glib::ustring& /* text */)
127 void SaxParser::on_error(const Glib::ustring& /* text */)
132 void SaxParser::on_fatal_error(const Glib::ustring& text)
134 throw parse_error("Fatal error: " + text);
137 void SaxParser::on_cdata_block(const Glib::ustring& /* text */)
141 void SaxParser::on_internal_subset(const Glib::ustring& name,
142 const Glib::ustring& publicId,
143 const Glib::ustring& systemId)
145 entity_resolver_doc_->set_internal_subset(name, publicId, systemId);
148 // implementation of this function is inspired by the SAX documentation by James Henstridge.
149 // (http://www.daa.com.au/~james/gnome/xml-sax/implementing.html)
150 void SaxParser::parse()
154 throw internal_error("Parser context not created.");
157 auto old_sax = context_->sax;
158 context_->sax = sax_handler_.get();
161 initialize_context();
163 const int parseError = xmlParseDocument(context_);
165 context_->sax = old_sax;
167 auto error_str = format_xml_parser_error(context_);
168 if (error_str.empty() && parseError == -1)
169 error_str = "xmlParseDocument() failed.";
171 release_underlying(); // Free context_
173 check_for_exception();
175 if(!error_str.empty())
177 throw parse_error(error_str);
181 void SaxParser::parse_file(const std::string& filename)
185 throw parse_error("Attempt to start a second parse while a parse is in progress.");
188 KeepBlanks k(KeepBlanks::Default);
190 context_ = xmlCreateFileParserCtxt(filename.c_str());
194 void SaxParser::parse_memory_raw(const unsigned char* contents, size_type bytes_count)
198 throw parse_error("Attempt to start a second parse while a parse is in progress.");
201 KeepBlanks k(KeepBlanks::Default);
203 context_ = xmlCreateMemoryParserCtxt((const char*)contents, bytes_count);
207 void SaxParser::parse_memory(const Glib::ustring& contents)
209 parse_memory_raw((const unsigned char*)contents.c_str(), contents.bytes());
212 void SaxParser::parse_stream(std::istream& in)
216 throw parse_error("Attempt to start a second parse while a parse is in progress.");
219 KeepBlanks k(KeepBlanks::Default);
222 context_ = xmlCreatePushParserCtxt(
224 nullptr, // user_data
227 nullptr); // no filename for fetching external entities
231 throw internal_error("Could not create parser context\n" + format_xml_error());
234 initialize_context();
236 // std::string or Glib::ustring?
237 // Output from the XML parser is UTF-8 encoded.
238 // But the istream "in" is input, i.e. an XML file. It can use any encoding.
239 // If it's not UTF-8, the file itself must contain information about which
240 // encoding it uses. See the XML specification. Thus use std::string.
241 int firstParseError = XML_ERR_OK;
243 while (!exception_ && std::getline(in, line))
245 // since getline does not get the line separator, we have to add it since the parser care
246 // about layout in certain cases.
249 const int parseError = xmlParseChunk(context_, line.c_str(),
250 line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */,
251 0 /* don't terminate */);
253 // Save the first error code if any, but read on.
254 // More errors might be reported and then thrown by check_for_exception().
255 if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
256 firstParseError = parseError;
261 //This is called just to terminate parsing.
262 const int parseError = xmlParseChunk(context_, nullptr /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);
264 if (parseError != XML_ERR_OK && firstParseError == XML_ERR_OK)
265 firstParseError = parseError;
268 auto error_str = format_xml_parser_error(context_);
269 if (error_str.empty() && firstParseError != XML_ERR_OK)
270 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(firstParseError);
272 release_underlying(); // Free context_
274 check_for_exception();
276 if(!error_str.empty())
278 throw parse_error(error_str);
282 void SaxParser::parse_chunk(const Glib::ustring& chunk)
284 parse_chunk_raw((const unsigned char*)chunk.c_str(), chunk.bytes());
287 void SaxParser::parse_chunk_raw(const unsigned char* contents, size_type bytes_count)
289 KeepBlanks k(KeepBlanks::Default);
294 context_ = xmlCreatePushParserCtxt(
296 nullptr, // user_data
299 nullptr); // no filename for fetching external entities
303 throw internal_error("Could not create parser context\n" + format_xml_error());
305 initialize_context();
308 xmlCtxtResetLastError(context_);
310 int parseError = XML_ERR_OK;
312 parseError = xmlParseChunk(context_, (const char*)contents, bytes_count, 0 /* don't terminate */);
314 check_for_exception();
316 auto error_str = format_xml_parser_error(context_);
317 if (error_str.empty() && parseError != XML_ERR_OK)
318 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(parseError);
319 if(!error_str.empty())
321 throw parse_error(error_str);
325 void SaxParser::finish_chunk_parsing()
330 context_ = xmlCreatePushParserCtxt(
332 nullptr, // user_data
335 nullptr); // no filename for fetching external entities
339 throw internal_error("Could not create parser context\n" + format_xml_error());
341 initialize_context();
344 xmlCtxtResetLastError(context_);
346 int parseError = XML_ERR_OK;
348 //This is called just to terminate parsing.
349 parseError = xmlParseChunk(context_, nullptr /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */);
351 auto error_str = format_xml_parser_error(context_);
352 if (error_str.empty() && parseError != XML_ERR_OK)
353 error_str = "Error code from xmlParseChunk(): " + Glib::ustring::format(parseError);
355 release_underlying(); // Free context_
357 check_for_exception();
359 if(!error_str.empty())
361 throw parse_error(error_str);
365 void SaxParser::release_underlying()
367 Parser::release_underlying();
370 void SaxParser::initialize_context()
372 Parser::initialize_context();
373 // Start with an empty Document for entity resolution.
374 entity_resolver_doc_.reset(new Document);
378 xmlEntityPtr SaxParserCallback::get_entity(void* context, const xmlChar* name)
380 auto the_context = static_cast<_xmlParserCtxt*>(context);
381 auto parser = static_cast<SaxParser*>(the_context->_private);
382 xmlEntityPtr result = nullptr;
386 result = parser->on_get_entity((const char*)name);
390 parser->handle_exception();
396 void SaxParserCallback::entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
398 auto the_context = static_cast<_xmlParserCtxt*>(context);
399 auto parser = static_cast<SaxParser*>(the_context->_private);
403 parser->on_entity_declaration(
404 ( name ? Glib::ustring((const char*)name) : ""),
405 static_cast<XmlEntityType>(type),
406 ( publicId ? Glib::ustring((const char*)publicId) : ""),
407 ( systemId ? Glib::ustring((const char*)systemId) : ""),
408 ( content ? Glib::ustring((const char*)content) : "") );
412 parser->handle_exception();
416 void SaxParserCallback::start_document(void* context)
418 auto the_context = static_cast<_xmlParserCtxt*>(context);
419 auto parser = static_cast<SaxParser*>(the_context->_private);
423 parser->on_start_document();
427 parser->handle_exception();
431 void SaxParserCallback::end_document(void* context)
433 auto the_context = static_cast<_xmlParserCtxt*>(context);
434 auto parser = static_cast<SaxParser*>(the_context->_private);
436 if (parser->exception_)
441 parser->on_end_document();
445 parser->handle_exception();
449 void SaxParserCallback::start_element(void* context,
453 auto the_context = static_cast<_xmlParserCtxt*>(context);
454 auto parser = static_cast<SaxParser*>(the_context->_private);
456 SaxParser::AttributeList attributes;
459 for(const xmlChar** cur = p; cur && *cur; cur += 2)
460 attributes.push_back(
461 SaxParser::Attribute( (char*)*cur, (char*)*(cur + 1) ));
465 parser->on_start_element(Glib::ustring((const char*) name), attributes);
469 parser->handle_exception();
473 void SaxParserCallback::end_element(void* context, const xmlChar* name)
475 auto the_context = static_cast<_xmlParserCtxt*>(context);
476 auto parser = static_cast<SaxParser*>(the_context->_private);
480 parser->on_end_element(Glib::ustring((const char*) name));
484 parser->handle_exception();
488 void SaxParserCallback::characters(void * context, const xmlChar* ch, int len)
490 auto the_context = static_cast<_xmlParserCtxt*>(context);
491 auto parser = static_cast<SaxParser*>(the_context->_private);
495 // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
496 // instead of Glib::ustring::ustring( const char*, size_type ) because it
497 // expects the length of the string in characters, not in bytes.
498 parser->on_characters(
500 reinterpret_cast<const char *>(ch),
501 reinterpret_cast<const char *>(ch + len) ) );
505 parser->handle_exception();
509 void SaxParserCallback::comment(void* context, const xmlChar* value)
511 auto the_context = static_cast<_xmlParserCtxt*>(context);
512 auto parser = static_cast<SaxParser*>(the_context->_private);
516 parser->on_comment(Glib::ustring((const char*) value));
520 parser->handle_exception();
524 void SaxParserCallback::warning(void* context, const char* fmt, ...)
526 auto the_context = static_cast<_xmlParserCtxt*>(context);
527 auto parser = static_cast<SaxParser*>(the_context->_private);
531 const Glib::ustring buff = format_printf_message(fmt, arg);
536 parser->on_warning(buff);
540 parser->handle_exception();
544 void SaxParserCallback::error(void* context, const char* fmt, ...)
546 auto the_context = static_cast<_xmlParserCtxt*>(context);
547 auto parser = static_cast<SaxParser*>(the_context->_private);
549 if (parser->exception_)
554 const Glib::ustring buff = format_printf_message(fmt, arg);
559 parser->on_error(buff);
563 parser->handle_exception();
567 void SaxParserCallback::fatal_error(void* context, const char* fmt, ...)
569 auto the_context = static_cast<_xmlParserCtxt*>(context);
570 auto parser = static_cast<SaxParser*>(the_context->_private);
574 const Glib::ustring buff = format_printf_message(fmt, arg);
579 parser->on_fatal_error(buff);
583 parser->handle_exception();
587 void SaxParserCallback::cdata_block(void* context, const xmlChar* value, int len)
589 auto the_context = static_cast<_xmlParserCtxt*>(context);
590 auto parser = static_cast<SaxParser*>(the_context->_private);
594 // Here we force the use of Glib::ustring::ustring( InputIterator begin, InputIterator end )
595 // see comments in SaxParserCallback::characters
596 parser->on_cdata_block(
598 reinterpret_cast<const char *>(value),
599 reinterpret_cast<const char *>(value + len) ) );
603 parser->handle_exception();
607 void SaxParserCallback::internal_subset(void* context, const xmlChar* name,
608 const xmlChar* publicId, const xmlChar* systemId)
610 auto the_context = static_cast<_xmlParserCtxt*>(context);
611 auto parser = static_cast<SaxParser*>(the_context->_private);
615 const auto pid = publicId ? Glib::ustring((const char*) publicId) : "";
616 const auto sid = systemId ? Glib::ustring((const char*) systemId) : "";
618 parser->on_internal_subset( Glib::ustring((const char*) name), pid, sid);
622 parser->handle_exception();