From f86ba34b57d1a307688402b495d8b0532770d98d Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 15 Jun 2016 14:06:40 +0900 Subject: [PATCH] Imported Upstream version 2.9.4 Change-Id: Ia77571980e4b0410bb3314b12af5a9e5bf732a38 Signed-off-by: DongHun Kwak --- ChangeLog | 24 +- HTMLparser.c | 140 +- HTMLtree.c | 3 +- INSTALL | 4 +- Makefile.am | 6 +- Makefile.in | 74 +- NEWS | 22 +- SAX2.c | 12 +- aclocal.m4 | 222 +- autogen.sh | 81 + buf.c | 43 +- catalog.c | 10 +- compile | 347 ++ config.guess | 201 +- config.h.in | 3 +- config.sub | 53 +- configure | 2593 ++++++++++----- configure.ac | 136 +- debugXML.c | 14 +- depcomp | 2 +- dict.c | 55 +- doc/APIchunk13.html | 28 - doc/APIchunk26.html | 1 + doc/APIfiles.html | 1 + doc/APIfunctions.html | 2 - doc/APIsymbols.html | 1 + doc/Makefile.in | 31 +- doc/apibuild.py | 1 + doc/devhelp/Makefile.in | 33 +- doc/devhelp/general.html | 2 +- doc/devhelp/libxml2-dict.html | 32 +- doc/devhelp/libxml2-parser.html | 2 +- doc/devhelp/libxml2-tree.html | 5 +- doc/devhelp/libxml2-xmlmemory.html | 2 +- doc/devhelp/libxml2-xmlregexp.html | 2 +- doc/devhelp/libxml2-xmlstring.html | 8 +- doc/devhelp/libxml2.devhelp | 1 + doc/examples/Makefile.in | 36 +- doc/examples/io1.res | 2 +- doc/html/book1.html | 2 +- doc/html/index.html | 2 +- doc/html/libxml-dict.html | 28 +- doc/html/libxml-lib.html | 2 +- doc/html/libxml-parser.html | 2 +- doc/html/libxml-tree.html | 3 +- doc/html/libxml-xmlmemory.html | 2 +- doc/html/libxml-xmlregexp.html | 2 +- doc/html/libxml-xmlstring.html | 8 +- doc/index.py | 6 +- doc/libxml2-api.xml | 54 +- doc/libxml2.xsa | 144 +- doc/news.html | 163 +- doc/search.php | 5 +- doc/xml.html | 165 + encoding.c | 2 +- entities.c | 2 +- error.c | 6 +- example/Makefile.in | 36 +- include/Makefile.in | 31 +- include/libxml/Makefile.in | 33 +- include/libxml/dict.h | 22 +- include/libxml/parser.h | 4 +- include/libxml/parserInternals.h | 2 +- include/libxml/schemasInternals.h | 92 +- include/libxml/tree.h | 3 +- include/libxml/xmlerror.h | 2 +- include/libxml/xmlstring.h | 8 +- include/libxml/xmlversion.h | 12 +- include/libxml/xmlversion.h.in | 2 +- include/libxml/xpathInternals.h | 2 +- install-sh | 366 +-- libxml.h | 5 +- libxml.spec.in | 60 +- libxml2-config.cmake.in | 2 +- libxml2.spec | 66 +- ltmain.sh | 5528 ++++++++++++++++++++------------ m4/libtool.m4 | 2550 ++++++++------- m4/ltoptions.m4 | 127 +- m4/ltsugar.m4 | 7 +- m4/ltversion.m4 | 12 +- m4/lt~obsolete.m4 | 7 +- missing | 6 +- os400/README400 | 22 +- os400/config.h.in | 345 ++ os400/dlfcn/dlfcn.c | 2 +- os400/initscript.sh | 35 +- os400/libxmlmain.c | 102 + os400/libxmlrpg/DOCBparser.rpgle | 18 +- os400/libxmlrpg/HTMLparser.rpgle | 118 +- os400/libxmlrpg/HTMLtree.rpgle | 44 +- os400/libxmlrpg/SAX.rpgle | 40 +- os400/libxmlrpg/SAX2.rpgle | 56 +- os400/libxmlrpg/c14n.rpgle | 42 +- os400/libxmlrpg/catalog.rpgle | 43 +- os400/libxmlrpg/chvalid.rpgle | 58 +- os400/libxmlrpg/debugXML.rpgle | 55 +- os400/libxmlrpg/dict.rpgle | 31 +- os400/libxmlrpg/encoding.rpgle | 46 +- os400/libxmlrpg/entities.rpgle | 16 +- os400/libxmlrpg/globals.rpgle | 184 +- os400/libxmlrpg/hash.rpgle | 36 +- os400/libxmlrpg/list.rpgle | 31 +- os400/libxmlrpg/nanoftp.rpgle | 54 +- os400/libxmlrpg/nanohttp.rpgle | 23 +- os400/libxmlrpg/parser.rpgle | 305 +- os400/libxmlrpg/parserInternals.rpgle | 99 +- os400/libxmlrpg/pattern.rpgle | 43 +- os400/libxmlrpg/relaxng.rpgle | 45 +- os400/libxmlrpg/schemasInternals.rpgle | 53 +- os400/libxmlrpg/schematron.rpgle | 33 +- os400/libxmlrpg/threads.rpgle | 7 +- os400/libxmlrpg/tree.rpgle | 279 +- os400/libxmlrpg/uri.rpgle | 15 +- os400/libxmlrpg/valid.rpgle | 114 +- os400/libxmlrpg/xinclude.rpgle | 35 +- os400/libxmlrpg/xlink.rpgle | 13 +- os400/libxmlrpg/xmlIO.rpgle | 103 +- os400/libxmlrpg/xmlTypesC.rpgle | 26 + os400/libxmlrpg/xmlautomata.rpgle | 38 +- os400/libxmlrpg/xmlerror.rpgle | 27 +- os400/libxmlrpg/xmlmemory.rpgle | 50 +- os400/libxmlrpg/xmlmodule.rpgle | 16 +- os400/libxmlrpg/xmlreader.rpgle | 189 +- os400/libxmlrpg/xmlregexp.rpgle | 70 +- os400/libxmlrpg/xmlsave.rpgle | 37 +- os400/libxmlrpg/xmlschemas.rpgle | 43 +- os400/libxmlrpg/xmlschemastypes.rpgle | 63 +- os400/libxmlrpg/xmlstdarg.rpgle | 11 +- os400/libxmlrpg/xmlstring.rpgle | 84 +- os400/libxmlrpg/xmlunicode.rpgle | 832 +++-- os400/libxmlrpg/xmlversion.rpgle.in | 3 +- os400/libxmlrpg/xmlwriter.rpgle | 260 +- os400/libxmlrpg/xpath.rpgle | 182 +- os400/libxmlrpg/xpathInternals.rpgle | 133 +- os400/libxmlrpg/xpointer.rpgle | 13 +- os400/make-rpg.sh | 13 +- os400/make-src.sh | 126 +- os400/make.sh | 11 +- os400/rpgsupport.c | 14 +- os400/rpgsupport.h | 14 +- os400/xmlcatalog.cmd | 112 + os400/xmlcatlgcl.c | 288 ++ os400/xmllint.cmd | 146 + os400/xmllintcl.c | 216 ++ parser.c | 308 +- parserInternals.c | 191 +- python/Makefile.in | 39 +- python/drv_libxml2.py | 18 +- python/libxml.c | 2 +- python/libxml2.py | 1 + python/libxml2class.py | 1 + python/setup.py | 2 +- python/tests/Makefile.in | 33 +- relaxng.c | 15 +- result/HTML/758605.html | 3 + result/HTML/758605.html.err | 3 + result/HTML/758605.html.sax | 14 + result/HTML/758606.html | 2 + result/HTML/758606.html.err | 16 + result/HTML/758606.html.sax | 10 + result/HTML/758606_2.html | 2 + result/HTML/758606_2.html.err | 16 + result/HTML/758606_2.html.sax | 17 + result/XPath/expr/base | 8 + result/XPath/tests/chaptersbase | 10 + result/XPath/tests/nssimple | 24 + result/XPath/tests/simplebase | 54 + result/cdata-2-byte-UTF-8.xml | 6 + result/cdata-2-byte-UTF-8.xml.rde | 15 + result/cdata-2-byte-UTF-8.xml.rdr | 15 + result/cdata-2-byte-UTF-8.xml.sax | 18 + result/cdata-2-byte-UTF-8.xml.sax2 | 18 + result/cdata-3-byte-UTF-8.xml | 7 + result/cdata-3-byte-UTF-8.xml.rde | 20 + result/cdata-3-byte-UTF-8.xml.rdr | 20 + result/cdata-3-byte-UTF-8.xml.sax | 23 + result/cdata-3-byte-UTF-8.xml.sax2 | 23 + result/cdata-4-byte-UTF-8.xml | 8 + result/cdata-4-byte-UTF-8.xml.rde | 25 + result/cdata-4-byte-UTF-8.xml.rdr | 25 + result/cdata-4-byte-UTF-8.xml.sax | 28 + result/cdata-4-byte-UTF-8.xml.sax2 | 28 + result/ent_738805.xml | 15 + result/ent_738805.xml.rde | 15 + result/ent_738805.xml.rdr | 31 + result/ent_738805.xml.sax | 66 + result/ent_738805.xml.sax2 | 66 + result/errors/754946.xml | 0 result/errors/754946.xml.err | 19 + result/errors/754946.xml.str | 4 + result/errors/754947.xml | 0 result/errors/754947.xml.err | 7 + result/errors/754947.xml.str | 5 + result/errors/758588.xml | 0 result/errors/758588.xml.err | 9 + result/errors/758588.xml.str | 10 + result/errors/759020.xml | 0 result/errors/759020.xml.err | 6 + result/errors/759020.xml.str | 7 + result/errors/759398.xml | 0 result/errors/759398.xml.err | 9 + result/errors/759398.xml.str | 5 + result/errors/759573-2.xml | 0 result/errors/759573-2.xml.err | 58 + result/errors/759573-2.xml.str | 4 + result/errors/759573.xml | 0 result/errors/759573.xml.err | 31 + result/errors/759573.xml.str | 4 + result/errors/cdata.xml.str | 4 +- result/errors/content1.xml.err | 2 +- result/noent/cdata-2-byte-UTF-8.xml | 6 + result/noent/cdata-3-byte-UTF-8.xml | 7 + result/noent/cdata-4-byte-UTF-8.xml | 8 + result/noent/ent_738805.xml | 15 + result/relaxng/565219_0 | 0 result/relaxng/565219_0.err | 2 + result/relaxng/565219_1 | 0 result/relaxng/565219_1.err | 1 + result/relaxng/565219_2 | 0 result/relaxng/565219_2.err | 2 + result/relaxng/565219_err | 1 + result/relaxng/565219_valid | 0 result/relaxng/710744_err | 1 + result/relaxng/710744_valid | 0 result/relaxng/pattern3_1 | 0 result/relaxng/pattern3_1.err | 1 + result/schemas/regexp-char-ref_0_0.err | 0 result/schemas/regexp-char-ref_1_0.err | 0 result/valid/737840.xml | 10 + result/valid/737840.xml.err | 0 result/valid/737840.xml.err.rdr | 0 result/valid/t8.xml.err | 2 +- result/valid/t8a.xml.err | 2 +- runtest.c | 94 +- schematron.c | 6 +- test/HTML/758605.html | 1 + test/HTML/758606.html | 1 + test/HTML/758606_2.html | 1 + test/XPath/docs/ns | 3 + test/XPath/expr/base | 2 + test/XPath/tests/chaptersbase | 1 + test/XPath/tests/nssimple | 3 + test/XPath/tests/simplebase | 6 + test/cdata-2-byte-UTF-8.xml | 6 + test/cdata-3-byte-UTF-8.xml | 7 + test/cdata-4-byte-UTF-8.xml | 8 + test/ent_738805.xml | 16 + test/errors/754946.xml | 1 + test/errors/754947.xml | 1 + test/errors/758588.xml | 1 + test/errors/759020.xml | 46 + test/errors/759398.xml | 326 ++ test/errors/759573-2.xml | 9 + test/errors/759573.xml | 1 + test/relaxng/565219.rng | 14 + test/relaxng/565219_0.xml | 1 + test/relaxng/565219_1.xml | 1 + test/relaxng/565219_2.xml | 1 + test/relaxng/pattern3.rng | 11 + test/relaxng/pattern3_1.xml | 1 + test/valid/737840.xml | 10 + test/valid/dtds/737840.ent | 1 + testModule.c | 2 +- testapi.c | 22 +- testdict.c | 4 +- threads.c | 6 +- timsort.h | 74 +- tree.c | 42 +- uri.c | 11 +- valid.c | 16 +- win32/VC10/config.h | 2 + win32/configure.js | 2 +- xinclude.c | 4 +- xmlIO.c | 26 +- xmlcatalog.c | 1 + xmllint.c | 30 +- xmlmemory.c | 26 +- xmlreader.c | 44 +- xmlregexp.c | 20 +- xmlsave.c | 8 +- xmlschemas.c | 128 +- xmlschemastypes.c | 25 +- xmlstring.c | 70 +- xmlwriter.c | 4 +- xpath.c | 59 +- xpointer.c | 2 +- xstc/Makefile.in | 31 +- xzlib.c | 10 +- 288 files changed, 15032 insertions(+), 7100 deletions(-) create mode 100755 autogen.sh create mode 100755 compile create mode 100644 os400/config.h.in create mode 100644 os400/libxmlmain.c create mode 100644 os400/libxmlrpg/xmlTypesC.rpgle create mode 100644 os400/xmlcatalog.cmd create mode 100644 os400/xmlcatlgcl.c create mode 100644 os400/xmllint.cmd create mode 100644 os400/xmllintcl.c create mode 100644 result/HTML/758605.html create mode 100644 result/HTML/758605.html.err create mode 100644 result/HTML/758605.html.sax create mode 100644 result/HTML/758606.html create mode 100644 result/HTML/758606.html.err create mode 100644 result/HTML/758606.html.sax create mode 100644 result/HTML/758606_2.html create mode 100644 result/HTML/758606_2.html.err create mode 100644 result/HTML/758606_2.html.sax create mode 100644 result/XPath/tests/nssimple create mode 100644 result/cdata-2-byte-UTF-8.xml create mode 100644 result/cdata-2-byte-UTF-8.xml.rde create mode 100644 result/cdata-2-byte-UTF-8.xml.rdr create mode 100644 result/cdata-2-byte-UTF-8.xml.sax create mode 100644 result/cdata-2-byte-UTF-8.xml.sax2 create mode 100644 result/cdata-3-byte-UTF-8.xml create mode 100644 result/cdata-3-byte-UTF-8.xml.rde create mode 100644 result/cdata-3-byte-UTF-8.xml.rdr create mode 100644 result/cdata-3-byte-UTF-8.xml.sax create mode 100644 result/cdata-3-byte-UTF-8.xml.sax2 create mode 100644 result/cdata-4-byte-UTF-8.xml create mode 100644 result/cdata-4-byte-UTF-8.xml.rde create mode 100644 result/cdata-4-byte-UTF-8.xml.rdr create mode 100644 result/cdata-4-byte-UTF-8.xml.sax create mode 100644 result/cdata-4-byte-UTF-8.xml.sax2 create mode 100644 result/ent_738805.xml create mode 100644 result/ent_738805.xml.rde create mode 100644 result/ent_738805.xml.rdr create mode 100644 result/ent_738805.xml.sax create mode 100644 result/ent_738805.xml.sax2 create mode 100644 result/errors/754946.xml create mode 100644 result/errors/754946.xml.err create mode 100644 result/errors/754946.xml.str create mode 100644 result/errors/754947.xml create mode 100644 result/errors/754947.xml.err create mode 100644 result/errors/754947.xml.str create mode 100644 result/errors/758588.xml create mode 100644 result/errors/758588.xml.err create mode 100644 result/errors/758588.xml.str create mode 100644 result/errors/759020.xml create mode 100644 result/errors/759020.xml.err create mode 100644 result/errors/759020.xml.str create mode 100644 result/errors/759398.xml create mode 100644 result/errors/759398.xml.err create mode 100644 result/errors/759398.xml.str create mode 100644 result/errors/759573-2.xml create mode 100644 result/errors/759573-2.xml.err create mode 100644 result/errors/759573-2.xml.str create mode 100644 result/errors/759573.xml create mode 100644 result/errors/759573.xml.err create mode 100644 result/errors/759573.xml.str create mode 100644 result/noent/cdata-2-byte-UTF-8.xml create mode 100644 result/noent/cdata-3-byte-UTF-8.xml create mode 100644 result/noent/cdata-4-byte-UTF-8.xml create mode 100644 result/noent/ent_738805.xml create mode 100644 result/relaxng/565219_0 create mode 100644 result/relaxng/565219_0.err create mode 100644 result/relaxng/565219_1 create mode 100644 result/relaxng/565219_1.err create mode 100644 result/relaxng/565219_2 create mode 100644 result/relaxng/565219_2.err create mode 100644 result/relaxng/565219_err create mode 100644 result/relaxng/565219_valid create mode 100644 result/relaxng/710744_err create mode 100644 result/relaxng/710744_valid create mode 100644 result/relaxng/pattern3_1 create mode 100644 result/relaxng/pattern3_1.err create mode 100644 result/schemas/regexp-char-ref_0_0.err create mode 100644 result/schemas/regexp-char-ref_1_0.err create mode 100644 result/valid/737840.xml create mode 100644 result/valid/737840.xml.err create mode 100644 result/valid/737840.xml.err.rdr create mode 100644 test/HTML/758605.html create mode 100644 test/HTML/758606.html create mode 100644 test/HTML/758606_2.html create mode 100644 test/XPath/docs/ns create mode 100644 test/XPath/tests/nssimple create mode 100644 test/cdata-2-byte-UTF-8.xml create mode 100644 test/cdata-3-byte-UTF-8.xml create mode 100644 test/cdata-4-byte-UTF-8.xml create mode 100644 test/ent_738805.xml create mode 100644 test/errors/754946.xml create mode 100644 test/errors/754947.xml create mode 100644 test/errors/758588.xml create mode 100644 test/errors/759020.xml create mode 100755 test/errors/759398.xml create mode 100644 test/errors/759573-2.xml create mode 100644 test/errors/759573.xml create mode 100644 test/relaxng/565219.rng create mode 100644 test/relaxng/565219_0.xml create mode 100644 test/relaxng/565219_1.xml create mode 100644 test/relaxng/565219_2.xml create mode 100644 test/relaxng/pattern3.rng create mode 100644 test/relaxng/pattern3_1.xml create mode 100644 test/valid/737840.xml create mode 100644 test/valid/dtds/737840.ent diff --git a/ChangeLog b/ChangeLog index 08725dd..ef6cb8e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -593,7 +593,7 @@ Thu Apr 24 13:56:53 CEST 2008 Daniel Veillard Tue Apr 22 10:27:17 CEST 2008 Daniel Veillard - * dict.c: improvement on the hashing of the dictionnary, with visible + * dict.c: improvement on the hashing of the dictionary, with visible speed up as the number of strings in the hash increases, work from Stefan Behnel @@ -5017,7 +5017,7 @@ Mon Jan 24 00:47:41 CET 2005 Daniel Veillard Sun Jan 23 23:54:39 CET 2005 Daniel Veillard * hash.c include/libxml/hash.h: added xmlHashCreateDict where - the hash reuses the dictionnary for internal strings + the hash reuses the dictionary for internal strings * entities.c valid.c parser.c: reuse that new API, leads to a decent speedup when parsing for example DocBook documents. @@ -5371,7 +5371,7 @@ Fri Nov 26 11:44:36 CET 2004 Daniel Veillard Wed Nov 24 13:41:52 CET 2004 Daniel Veillard * dict.c include/libxml/dict.h: added xmlDictExists() to the - dictionnary interface. + dictionary interface. * xmlreader.c: applying xmlTextReaderHasAttributes fix for namespaces from Rob Richards @@ -5697,7 +5697,7 @@ Tue Oct 26 23:57:02 CEST 2004 Daniel Veillard Tue Oct 26 18:09:59 CEST 2004 Daniel Veillard * debugXML.c include/libxml/xmlerror.h: added checking for names - values and dictionnaries generates a tons of errors + values and dictionaries generates a tons of errors * SAX2.ccatalog.c parser.c relaxng.c tree.c xinclude.c xmlwriter.c include/libxml/tree.h: fixing the errors in the regression tests @@ -7746,14 +7746,14 @@ Fri Jan 23 14:03:21 CET 2004 Daniel Veillard make tests * xpath.c include/libxml/xpath.h: added xmlXPathCtxtCompile() to compile an XPath expression within a context, currently the goal - is to be able to reuse the XSLT stylesheet dictionnary, but this + is to be able to reuse the XSLT stylesheet dictionary, but this opens the door to others possible optimizations. * dict.c include/libxml/dict.h: added xmlDictCreateSub() which allows - to build a new dictionnary based on another read-only dictionnary. - This is needed for XSLT to keep the stylesheet dictionnary read-only + to build a new dictionary based on another read-only dictionary. + This is needed for XSLT to keep the stylesheet dictionary read-only while being able to reuse the strings for the transformation - dictionnary. - * xinclude.c: fixed a dictionnar reference counting problem occuring + dictionary. + * xinclude.c: fixed a dictionary reference counting problem occuring when document parsing failed. * testSAX.c: adding option --repeat for timing 100times the parsing * doc/* : rebuilt all the docs @@ -7806,7 +7806,7 @@ Mon Jan 12 17:22:57 CET 2004 Daniel Veillard Thu Jan 8 17:57:50 CET 2004 Daniel Veillard * xmlschemas.c: removed a memory leak remaining from the switch - to a dictionnary for string allocations c.f. #130891 + to a dictionary for string allocations c.f. #130891 Thu Jan 8 17:48:46 CET 2004 Daniel Veillard @@ -7928,7 +7928,7 @@ Fri Jan 2 22:58:29 HKT 2004 William Brack Fri Jan 2 11:40:06 CET 2004 Daniel Veillard * SAX2.c: found and fixed a bug misallocating some non - blank text node strings from the dictionnary. + blank text node strings from the dictionary. * xmlmemory.c: fixed a problem with the memory debug mutex release. @@ -9386,7 +9386,7 @@ Sat Sep 27 01:25:39 CEST 2003 Daniel Veillard * parser.c: William's change allowed to spot a nasty bug in xmlDoRead if the result is not well formed that ctxt->myDoc is not NULL - and uses the context dictionnary. + and uses the context dictionary. Fri Sep 26 21:09:34 CEST 2003 Daniel Veillard diff --git a/HTMLparser.c b/HTMLparser.c index d329d3b..d1395fa 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -105,7 +105,7 @@ htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) * * Handle a fatal parser error, i.e. violating Well-Formedness constraints */ -static void +static void LIBXML_ATTR_FORMAT(3,0) htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2) { @@ -132,7 +132,7 @@ htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, * * Handle a fatal parser error, i.e. violating Well-Formedness constraints */ -static void +static void LIBXML_ATTR_FORMAT(3,0) htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, int val) { @@ -303,6 +303,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) #define UPP(val) (toupper(ctxt->input->cur[(val)])) #define CUR_PTR ctxt->input->cur +#define BASE_PTR ctxt->input->base #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ @@ -2471,6 +2472,10 @@ htmlParseName(htmlParserCtxtPtr ctxt) { (*in == '_') || (*in == '-') || (*in == ':') || (*in == '.')) in++; + + if (in == ctxt->input->end) + return(NULL); + if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); @@ -2488,6 +2493,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; int count = 0; + const xmlChar *base = ctxt->input->base; /* * Handler for more complex cases @@ -2513,7 +2519,18 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { len += l; NEXTL(l); c = CUR_CHAR(l); + if (ctxt->input->base != base) { + /* + * We changed encoding from an unknown encoding + * Input buffer changed location, so we better start again + */ + return(htmlParseNameComplex(ctxt)); + } } + + if (ctxt->input->base > ctxt->input->cur - len) + return(NULL); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } @@ -2765,31 +2782,43 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { static xmlChar * htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { - const xmlChar *q; + size_t len = 0, startPosition = 0; xmlChar *ret = NULL; if (CUR == '"') { NEXT; - q = CUR_PTR; - while ((IS_CHAR_CH(CUR)) && (CUR != '"')) + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_CHAR_CH(CUR)) && (CUR != '"')) { NEXT; + len++; + } if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR+startPosition), len); NEXT; } } else if (CUR == '\'') { NEXT; - q = CUR_PTR; - while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) { NEXT; + len++; + } if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR+startPosition), len); NEXT; } } else { @@ -2813,32 +2842,47 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { static xmlChar * htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { - const xmlChar *q; + size_t len = 0, startPosition = 0; xmlChar *ret = NULL; /* * Name ::= (Letter | '_') (NameChar)* */ if (CUR == '"') { NEXT; - q = CUR_PTR; - while (IS_PUBIDCHAR_CH(CUR)) NEXT; + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while (IS_PUBIDCHAR_CH(CUR)) { + len++; + NEXT; + } + if (CUR != '"') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR + startPosition), len); NEXT; } } else if (CUR == '\'') { NEXT; - q = CUR_PTR; - while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) - NEXT; + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){ + len++; + NEXT; + } + if (CUR != '\'') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR + startPosition), len); NEXT; } } else { @@ -2948,8 +2992,9 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { /** - * htmlParseCharData: + * htmlParseCharDataInternal: * @ctxt: an HTML parser context + * @readahead: optional read ahead character in ascii range * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. @@ -2958,12 +3003,15 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { */ static void -htmlParseCharData(htmlParserCtxtPtr ctxt) { - xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; +htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6]; int nbchar = 0; int cur, l; int chunk = 0; + if (readahead) + buf[nbchar++] = readahead; + SHRINK; cur = CUR_CHAR(l); while (((cur != '<') || (ctxt->token == '<')) && @@ -3043,6 +3091,21 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) { } /** + * htmlParseCharData: + * @ctxt: an HTML parser context + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +static void +htmlParseCharData(htmlParserCtxtPtr ctxt) { + htmlParseCharDataInternal(ctxt, 0); +} + +/** * htmlParseExternalID: * @ctxt: an HTML parser context * @publicID: a xmlChar** receiving PubidLiteral @@ -3245,12 +3308,17 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { ctxt->instate = state; return; } + len = 0; + buf[len] = 0; q = CUR_CHAR(ql); + if (!IS_CHAR(q)) + goto unfinished; NEXTL(ql); r = CUR_CHAR(rl); + if (!IS_CHAR(r)) + goto unfinished; NEXTL(rl); cur = CUR_CHAR(l); - len = 0; while (IS_CHAR(cur) && ((cur != '>') || (r != '-') || (q != '-'))) { @@ -3281,18 +3349,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { } } buf[len] = 0; - if (!IS_CHAR(cur)) { - htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n + +

+

+
diff --git a/result/cdata-2-byte-UTF-8.xml.rde b/result/cdata-2-byte-UTF-8.xml.rde new file mode 100644 index 0000000..2eb2940 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.rde @@ -0,0 +1,15 @@ +0 8 #comment 0 1 This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-2-byte-UTF-8.xml.rdr b/result/cdata-2-byte-UTF-8.xml.rdr new file mode 100644 index 0000000..2eb2940 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.rdr @@ -0,0 +1,15 @@ +0 8 #comment 0 1 This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 ČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČČ +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-2-byte-UTF-8.xml.sax b/result/cdata-2-byte-UTF-8.xml.sax new file mode 100644 index 0000000..f397f6a --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.sax @@ -0,0 +1,18 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(ČČČČČČČČČČ, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( ČČČČČČČČČÄ, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-2-byte-UTF-8.xml.sax2 b/result/cdata-2-byte-UTF-8.xml.sax2 new file mode 100644 index 0000000..2da2d50 --- /dev/null +++ b/result/cdata-2-byte-UTF-8.xml.sax2 @@ -0,0 +1,18 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that two-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(ČČČČČČČČČČ, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( ČČČČČČČČČÄ, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/cdata-3-byte-UTF-8.xml b/result/cdata-3-byte-UTF-8.xml new file mode 100644 index 0000000..b959a12 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/result/cdata-3-byte-UTF-8.xml.rde b/result/cdata-3-byte-UTF-8.xml.rde new file mode 100644 index 0000000..3f4d1c5 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.rde @@ -0,0 +1,20 @@ +0 8 #comment 0 1 This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-3-byte-UTF-8.xml.rdr b/result/cdata-3-byte-UTF-8.xml.rdr new file mode 100644 index 0000000..3f4d1c5 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.rdr @@ -0,0 +1,20 @@ +0 8 #comment 0 1 This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛牛 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-3-byte-UTF-8.xml.sax b/result/cdata-3-byte-UTF-8.xml.sax new file mode 100644 index 0000000..2f73e7c --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.sax @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(牛牛牛牛牛牛ç‰, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 牛牛牛牛牛牛ç, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 牛牛牛牛牛牛, 1202) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-3-byte-UTF-8.xml.sax2 b/result/cdata-3-byte-UTF-8.xml.sax2 new file mode 100644 index 0000000..3969579 --- /dev/null +++ b/result/cdata-3-byte-UTF-8.xml.sax2 @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that three-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(牛牛牛牛牛牛ç‰, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 牛牛牛牛牛牛ç, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 牛牛牛牛牛牛, 1202) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/cdata-4-byte-UTF-8.xml b/result/cdata-4-byte-UTF-8.xml new file mode 100644 index 0000000..4d1d9a8 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+
diff --git a/result/cdata-4-byte-UTF-8.xml.rde b/result/cdata-4-byte-UTF-8.xml.rde new file mode 100644 index 0000000..437b79e --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.rde @@ -0,0 +1,25 @@ +0 8 #comment 0 1 This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-4-byte-UTF-8.xml.rdr b/result/cdata-4-byte-UTF-8.xml.rdr new file mode 100644 index 0000000..437b79e --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.rdr @@ -0,0 +1,25 @@ +0 8 #comment 0 1 This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +1 1 p 0 0 +2 4 #cdata-section 0 1 🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦🍦 +1 15 p 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/cdata-4-byte-UTF-8.xml.sax b/result/cdata-4-byte-UTF-8.xml.sax new file mode 100644 index 0000000..d8abcfb --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.sax @@ -0,0 +1,28 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata(🍦🍦🍦🍦🍦, 1200) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦ðŸ, 1201) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦ðŸ, 1202) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.pcdata( 🍦🍦🍦🍦ð, 1203) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/cdata-4-byte-UTF-8.xml.sax2 b/result/cdata-4-byte-UTF-8.xml.sax2 new file mode 100644 index 0000000..5e07d83 --- /dev/null +++ b/result/cdata-4-byte-UTF-8.xml.sax2 @@ -0,0 +1,28 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( This tests that four-byte UTF-8 characters are parsed properly when split across a buffer boundary of length XML_PARSER_BIG_BUFFER_SIZE (300 bytes). ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata(🍦🍦🍦🍦🍦, 1200) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦ðŸ, 1201) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦ðŸ, 1202) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.startElementNs(p, NULL, NULL, 0, 0, 0) +SAX.pcdata( 🍦🍦🍦🍦ð, 1203) +SAX.endElementNs(p, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/ent_738805.xml b/result/ent_738805.xml new file mode 100644 index 0000000..d285eee --- /dev/null +++ b/result/ent_738805.xml @@ -0,0 +1,15 @@ + + + +]> + + + + +&a; should appear after colon: &a; +&b; should appear after colon: &a; +&a; should appear after colon: &b; +&b; should appear after colon: &b; + + diff --git a/result/ent_738805.xml.rde b/result/ent_738805.xml.rde new file mode 100644 index 0000000..fa086fe --- /dev/null +++ b/result/ent_738805.xml.rde @@ -0,0 +1,15 @@ +0 10 somedoc 0 0 +0 1 somedoc 0 0 +1 14 #text 0 1 + + +1 1 somebeacon 1 0 +1 3 #text 0 1 + +something should appear after colon: something +something should appear after colon: something +something should appear after colon: something +something should appear after colon: something + + +0 15 somedoc 0 0 diff --git a/result/ent_738805.xml.rdr b/result/ent_738805.xml.rdr new file mode 100644 index 0000000..c52dbf1 --- /dev/null +++ b/result/ent_738805.xml.rdr @@ -0,0 +1,31 @@ +0 10 somedoc 0 0 +0 1 somedoc 0 0 +1 14 #text 0 1 + + +1 1 somebeacon 1 0 +1 14 #text 0 1 + + +1 5 a 0 0 +1 3 #text 0 1 should appear after colon: +1 5 a 0 0 +1 14 #text 0 1 + +1 5 b 0 0 +1 3 #text 0 1 should appear after colon: +1 5 a 0 0 +1 14 #text 0 1 + +1 5 a 0 0 +1 3 #text 0 1 should appear after colon: +1 5 b 0 0 +1 14 #text 0 1 + +1 5 b 0 0 +1 3 #text 0 1 should appear after colon: +1 5 b 0 0 +1 14 #text 0 1 + + +0 15 somedoc 0 0 diff --git a/result/ent_738805.xml.sax b/result/ent_738805.xml.sax new file mode 100644 index 0000000..2649117 --- /dev/null +++ b/result/ent_738805.xml.sax @@ -0,0 +1,66 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(somedoc, , ) +SAX.entityDecl(a, 1, (null), (null), something) +SAX.getEntity(a) +SAX.entityDecl(b, 1, (null), (null), &a;) +SAX.getEntity(b) +SAX.externalSubset(somedoc, , ) +SAX.startElement(somedoc) +SAX.characters( + +, 2) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.startElement(somebeacon, someattribute='&b;') +SAX.endElement(somebeacon) +SAX.characters( + +, 2) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( +, 1) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( +, 1) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( +, 1) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( + +, 2) +SAX.endElement(somedoc) +SAX.endDocument() diff --git a/result/ent_738805.xml.sax2 b/result/ent_738805.xml.sax2 new file mode 100644 index 0000000..1eae781 --- /dev/null +++ b/result/ent_738805.xml.sax2 @@ -0,0 +1,66 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(somedoc, , ) +SAX.entityDecl(a, 1, (null), (null), something) +SAX.getEntity(a) +SAX.entityDecl(b, 1, (null), (null), &a;) +SAX.getEntity(b) +SAX.externalSubset(somedoc, , ) +SAX.startElementNs(somedoc, NULL, NULL, 0, 0, 0) +SAX.characters( + +, 2) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.startElementNs(somebeacon, NULL, NULL, 0, 1, 0, someattribute='&b;...', 3) +SAX.endElementNs(somebeacon, NULL, NULL) +SAX.characters( + +, 2) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( +, 1) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( +, 1) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( +, 1) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( should appear after colon: , 28) +SAX.getEntity(b) +SAX.getEntity(a) +SAX.characters(something, 9) +SAX.reference(a) +SAX.reference(b) +SAX.characters( + +, 2) +SAX.endElementNs(somedoc, NULL, NULL) +SAX.endDocument() diff --git a/result/errors/754946.xml b/result/errors/754946.xml new file mode 100644 index 0000000..e69de29 diff --git a/result/errors/754946.xml.err b/result/errors/754946.xml.err new file mode 100644 index 0000000..c03e35b --- /dev/null +++ b/result/errors/754946.xml.err @@ -0,0 +1,19 @@ +Entity: line 1: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration + + %SYSTEM; + ^ +Entity: line 1: +A + ^ +./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1 + + ^ diff --git a/result/errors/754947.xml.str b/result/errors/754947.xml.str new file mode 100644 index 0000000..4d2f52e --- /dev/null +++ b/result/errors/754947.xml.str @@ -0,0 +1,5 @@ +./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +Bytes: 0xEE 0x5D 0x5D 0x3E + + ^ +./test/errors/754947.xml : failed to parse diff --git a/result/errors/758588.xml b/result/errors/758588.xml new file mode 100644 index 0000000..e69de29 diff --git a/result/errors/758588.xml.err b/result/errors/758588.xml.err new file mode 100644 index 0000000..dfa59bc --- /dev/null +++ b/result/errors/758588.xml.err @@ -0,0 +1,9 @@ +./test/errors/758588.xml:1: namespace error : Namespace prefix a-340282366920938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867209384634725979468672093846347259794686720938463472597946867261d on a is not defined +63472597946867209384634725979468672093846347259794686720938463472597946867261d:a + ^ +./test/errors/758588.xml:1: parser error : expected '>' +2597946867209384634725979468672093846347259794686720938463472597946867261d:a>' +2597946867209384634725979468672093846347259794686720938463472597946867261d:a> + ^ +Entity: line 1: parser error : xmlParseEntityDecl: no name + %zz; + ^ +Entity: line 1: + + ^ +Entity: line 1: parser error : ParsePI: PI xDOCTYPEm space expected + %zz; + ^ +Entity: line 1: + + ^ +Entity: line 1: parser error : Space required after ' + ^ +Entity: line 1: parser error : xmlParseEntityDecl: no name + %zz; + ^ +Entity: line 1: + + ^ +Entity: line 1: parser error : ParsePI: PI xDOCTYPEm space expected + %zz; + ^ +Entity: line 1: + + ^ +Entity: line 1: parser error : Space required after 'ELEMENT' + %xx; + ^ +Entity: line 3: +%zz; + ^ +./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated +%xx;ÿggKENSMYNT#MENTDŴzz;'> + ^ +./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found +%xx;ÿggKENSMYNT#MENTDŴzz;'> + ^ diff --git a/result/errors/759573-2.xml.str b/result/errors/759573-2.xml.str new file mode 100644 index 0000000..baac164 --- /dev/null +++ b/result/errors/759573-2.xml.str @@ -0,0 +1,4 @@ +./test/errors/759573-2.xml:2: parser error : Extra content at the end of the document +%xx; + ^ +./test/errors/759573.xml:1: parser error : DOCTYPE improperly terminated +T t (A)>%xx; + ^ +./test/errors/759573.xml:1: parser error : Start tag expected, '<' not found +T t (A)>%xx; + ^ diff --git a/result/errors/759573.xml.str b/result/errors/759573.xml.str new file mode 100644 index 0000000..1b6addb --- /dev/null +++ b/result/errors/759573.xml.str @@ -0,0 +1,4 @@ +./test/errors/759573.xml:1: parser error : Extra content at the end of the document + - ^ + ^ ./test/errors/cdata.xml : failed to parse diff --git a/result/errors/content1.xml.err b/result/errors/content1.xml.err index 425be39..9fcd603 100644 --- a/result/errors/content1.xml.err +++ b/result/errors/content1.xml.err @@ -13,4 +13,4 @@ ^ ./test/errors/content1.xml:7: parser error : Start tag expected, '<' not found - ^ + ^ diff --git a/result/noent/cdata-2-byte-UTF-8.xml b/result/noent/cdata-2-byte-UTF-8.xml new file mode 100644 index 0000000..8552efc --- /dev/null +++ b/result/noent/cdata-2-byte-UTF-8.xml @@ -0,0 +1,6 @@ + + + +

+

+
diff --git a/result/noent/cdata-3-byte-UTF-8.xml b/result/noent/cdata-3-byte-UTF-8.xml new file mode 100644 index 0000000..b959a12 --- /dev/null +++ b/result/noent/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/result/noent/cdata-4-byte-UTF-8.xml b/result/noent/cdata-4-byte-UTF-8.xml new file mode 100644 index 0000000..4d1d9a8 --- /dev/null +++ b/result/noent/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+
diff --git a/result/noent/ent_738805.xml b/result/noent/ent_738805.xml new file mode 100644 index 0000000..5e44a55 --- /dev/null +++ b/result/noent/ent_738805.xml @@ -0,0 +1,15 @@ + + + +]> + + + + +something should appear after colon: something +something should appear after colon: something +something should appear after colon: something +something should appear after colon: something + + diff --git a/result/relaxng/565219_0 b/result/relaxng/565219_0 new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/565219_0.err b/result/relaxng/565219_0.err new file mode 100644 index 0000000..e3f73fa --- /dev/null +++ b/result/relaxng/565219_0.err @@ -0,0 +1,2 @@ +./test/relaxng/565219_0.xml:1: element foo: Relax-NG validity error : Element foo has wrong namespace: expecting http://bar.com/ +./test/relaxng/565219_0.xml fails to validate diff --git a/result/relaxng/565219_1 b/result/relaxng/565219_1 new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/565219_1.err b/result/relaxng/565219_1.err new file mode 100644 index 0000000..2999bba --- /dev/null +++ b/result/relaxng/565219_1.err @@ -0,0 +1 @@ +./test/relaxng/565219_1.xml validates diff --git a/result/relaxng/565219_2 b/result/relaxng/565219_2 new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/565219_2.err b/result/relaxng/565219_2.err new file mode 100644 index 0000000..7a997d3 --- /dev/null +++ b/result/relaxng/565219_2.err @@ -0,0 +1,2 @@ +./test/relaxng/565219_2.xml:1: element foo: Relax-NG validity error : Element foo has wrong namespace: expecting http://bar.com/ +./test/relaxng/565219_2.xml fails to validate diff --git a/result/relaxng/565219_err b/result/relaxng/565219_err new file mode 100644 index 0000000..c43e67c --- /dev/null +++ b/result/relaxng/565219_err @@ -0,0 +1 @@ +./test/relaxng/565219.rng validates diff --git a/result/relaxng/565219_valid b/result/relaxng/565219_valid new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/710744_err b/result/relaxng/710744_err new file mode 100644 index 0000000..54f0226 --- /dev/null +++ b/result/relaxng/710744_err @@ -0,0 +1 @@ +./test/relaxng/710744.rng validates diff --git a/result/relaxng/710744_valid b/result/relaxng/710744_valid new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/pattern3_1 b/result/relaxng/pattern3_1 new file mode 100644 index 0000000..e69de29 diff --git a/result/relaxng/pattern3_1.err b/result/relaxng/pattern3_1.err new file mode 100644 index 0000000..b1a9803 --- /dev/null +++ b/result/relaxng/pattern3_1.err @@ -0,0 +1 @@ +./test/relaxng/pattern3_1.xml validates diff --git a/result/schemas/regexp-char-ref_0_0.err b/result/schemas/regexp-char-ref_0_0.err new file mode 100644 index 0000000..e69de29 diff --git a/result/schemas/regexp-char-ref_1_0.err b/result/schemas/regexp-char-ref_1_0.err new file mode 100644 index 0000000..e69de29 diff --git a/result/valid/737840.xml b/result/valid/737840.xml new file mode 100644 index 0000000..433c6d6 --- /dev/null +++ b/result/valid/737840.xml @@ -0,0 +1,10 @@ + + + + + +]> + + ⌖ + diff --git a/result/valid/737840.xml.err b/result/valid/737840.xml.err new file mode 100644 index 0000000..e69de29 diff --git a/result/valid/737840.xml.err.rdr b/result/valid/737840.xml.err.rdr new file mode 100644 index 0000000..e69de29 diff --git a/result/valid/t8.xml.err b/result/valid/t8.xml.err index d795788..1a3c006 100644 --- a/result/valid/t8.xml.err +++ b/result/valid/t8.xml.err @@ -16,4 +16,4 @@ Entity: line 1: parser error : Start tag expected, '<' not found ^ Entity: line 1: <!ELEMENT root (middle) > - ^ +^ diff --git a/result/valid/t8a.xml.err b/result/valid/t8a.xml.err index d795788..1a3c006 100644 --- a/result/valid/t8a.xml.err +++ b/result/valid/t8a.xml.err @@ -16,4 +16,4 @@ Entity: line 1: parser error : Start tag expected, '<' not found ^ Entity: line 1: <!ELEMENT root (middle) > - ^ +^ diff --git a/runtest.c b/runtest.c index 02fe09a..bb74d2a 100644 --- a/runtest.c +++ b/runtest.c @@ -81,8 +81,10 @@ */ #ifdef O_BINARY #define RD_FLAGS O_RDONLY | O_BINARY +#define WR_FLAGS O_WRONLY | O_CREAT | O_TRUNC | O_BINARY #else -#define RD_FLAGS O_RDONLY +#define RD_FLAGS O_RDONLY +#define WR_FLAGS O_WRONLY | O_CREAT | O_TRUNC #endif typedef int (*functest) (const char *filename, const char *result, @@ -100,6 +102,7 @@ struct testDesc { int options; /* parser options for the test */ }; +static int update_results = 0; static int checkTestFile(const char *filename); #if defined(_WIN32) && !defined(__CYGWIN__) @@ -604,12 +607,34 @@ static int checkTestFile(const char *filename) { return(1); } -static int compareFiles(const char *r1, const char *r2) { +static int compareFiles(const char *r1 /* temp */, const char *r2 /* result */) { int res1, res2; int fd1, fd2; char bytes1[4096]; char bytes2[4096]; + if (update_results) { + fd1 = open(r1, RD_FLAGS); + if (fd1 < 0) + return(-1); + fd2 = open(r2, WR_FLAGS, 0644); + if (fd2 < 0) { + close(fd1); + return(-1); + } + do { + res1 = read(fd1, bytes1, 4096); + if (res1 <= 0) + break; + res2 = write(fd2, bytes1, res1); + if (res2 <= 0 || res2 != res1) + break; + } while (1); + close(fd2); + close(fd1); + return(res1 != 0); + } + fd1 = open(r1, RD_FLAGS); if (fd1 < 0) return(-1); @@ -646,13 +671,31 @@ static int compareFileMem(const char *filename, const char *mem, int size) { int idx = 0; struct stat info; - if (stat(filename, &info) < 0) + if (update_results) { + fd = open(filename, WR_FLAGS, 0644); + if (fd < 0) { + fprintf(stderr, "failed to open %s for writing", filename); + return(-1); + } + res = write(fd, mem, size); + close(fd); + return(res != size); + } + + if (stat(filename, &info) < 0) { + fprintf(stderr, "failed to stat %s\n", filename); return(-1); - if (info.st_size != size) + } + if (info.st_size != size) { + fprintf(stderr, "file %s is %ld bytes, result is %d bytes\n", + filename, info.st_size, size); return(-1); + } fd = open(filename, RD_FLAGS); - if (fd < 0) + if (fd < 0) { + fprintf(stderr, "failed to open %s for reading", filename); return(-1); + } while (idx < size) { res = read(fd, bytes, 4096); if (res <= 0) @@ -671,6 +714,9 @@ static int compareFileMem(const char *filename, const char *mem, int size) { idx += res; } close(fd); + if (idx != size) { + fprintf(stderr,"Compare error index %d, size %d\n", idx, size); + } return(idx != size); } @@ -1827,7 +1873,7 @@ pushParseTest(const char *filename, const char *result, ctxt = xmlCreatePushParserCtxt(NULL, NULL, base + cur, 4, filename); xmlCtxtUseOptions(ctxt, options); cur += 4; - while (cur < size) { + do { if (cur + 1024 >= size) { #ifdef LIBXML_HTML_ENABLED if (options & XML_PARSE_HTML) @@ -1845,7 +1891,7 @@ pushParseTest(const char *filename, const char *result, xmlParseChunk(ctxt, base + cur, 1024, 0); cur += 1024; } - } + } while (cur < size); doc = ctxt->myDoc; #ifdef LIBXML_HTML_ENABLED if (options & XML_PARSE_HTML) @@ -1871,7 +1917,7 @@ pushParseTest(const char *filename, const char *result, if ((base == NULL) || (res != 0)) { if (base != NULL) xmlFree((char *)base); - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); return(-1); } xmlFree((char *)base); @@ -1926,7 +1972,7 @@ memParseTest(const char *filename, const char *result, if ((base == NULL) || (res != 0)) { if (base != NULL) xmlFree((char *)base); - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); return(-1); } xmlFree((char *)base); @@ -2037,16 +2083,16 @@ errParseTest(const char *filename, const char *result, const char *err, xmlDocDumpMemory(doc, (xmlChar **) &base, &size); } res = compareFileMem(result, base, size); + if (res != 0) { + fprintf(stderr, "Result for %s failed in %s\n", filename, result); + return(-1); + } } if (doc != NULL) { if (base != NULL) xmlFree((char *)base); xmlFreeDoc(doc); } - if (res != 0) { - fprintf(stderr, "Result for %s failed\n", filename); - return(-1); - } if (err != NULL) { res = compareFileMem(err, testErrors, testErrorsSize); if (res != 0) { @@ -2159,7 +2205,7 @@ streamProcessTest(const char *filename, const char *result, const char *err, free(temp); } if (ret) { - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); return(-1); } } @@ -2362,7 +2408,7 @@ xpathCommonTest(const char *filename, const char *result, if (result != NULL) { ret = compareFiles(temp, result); if (ret) { - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); } } @@ -2533,7 +2579,7 @@ xmlidDocTest(const char *filename, if (result != NULL) { ret = compareFiles(temp, result); if (ret) { - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); res = 1; } } @@ -2661,7 +2707,7 @@ uriCommonTest(const char *filename, if (result != NULL) { ret = compareFiles(temp, result); if (ret) { - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); res = 1; } } @@ -3430,11 +3476,11 @@ patternTest(const char *filename, result[499] = 0; memcpy(xml + len, ".xml", 5); - if (!checkTestFile(xml)) { + if (!checkTestFile(xml) && !update_results) { fprintf(stderr, "Missing xml file %s\n", xml); return(-1); } - if (!checkTestFile(result)) { + if (!checkTestFile(result) && !update_results) { fprintf(stderr, "Missing result file %s\n", result); return(-1); } @@ -3533,7 +3579,7 @@ patternTest(const char *filename, ret = compareFiles(temp, result); if (ret) { - fprintf(stderr, "Result for %s failed\n", filename); + fprintf(stderr, "Result for %s failed in %s\n", filename, result); ret = 1; } if (temp != NULL) { @@ -3805,7 +3851,7 @@ c14nCommonTest(const char *filename, int with_comments, int mode, prefix[len] = 0; snprintf(buf, 499, "result/c14n/%s/%s", subdir,prefix); - if (!checkTestFile(buf)) { + if (!checkTestFile(buf) && !update_results) { fprintf(stderr, "Missing result file %s", buf); return(-1); } @@ -4354,9 +4400,9 @@ launchTests(testDescPtr tst) { } else { error = NULL; } - if ((result) &&(!checkTestFile(result))) { + if ((result) &&(!checkTestFile(result)) && !update_results) { fprintf(stderr, "Missing result file %s\n", result); - } else if ((error) &&(!checkTestFile(error))) { + } else if ((error) &&(!checkTestFile(error)) && !update_results) { fprintf(stderr, "Missing error file %s\n", error); } else { mem = xmlMemUsed(); @@ -4440,6 +4486,8 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { for (a = 1; a < argc;a++) { if (!strcmp(argv[a], "-v")) verbose = 1; + else if (!strcmp(argv[a], "-u")) + update_results = 1; else if (!strcmp(argv[a], "-quiet")) tests_quiet = 1; else { diff --git a/schematron.c b/schematron.c index eb4befe..6200f2d 100644 --- a/schematron.c +++ b/schematron.c @@ -133,7 +133,7 @@ struct _xmlSchematron { int flags; /* specific to this schematron */ void *_private; /* unused by the library */ - xmlDictPtr dict; /* the dictionnary used internally */ + xmlDictPtr dict; /* the dictionary used internally */ const xmlChar *title; /* the title if any */ @@ -186,7 +186,7 @@ struct _xmlSchematronParserCtxt { const char *buffer; int size; - xmlDictPtr dict; /* dictionnary for interned string names */ + xmlDictPtr dict; /* dictionary for interned string names */ int nberrors; int err; @@ -245,7 +245,7 @@ xmlSchematronPErrMemory(xmlSchematronParserCtxtPtr ctxt, * * Handle a parser error */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlSchematronPErr(xmlSchematronParserCtxtPtr ctxt, xmlNodePtr node, int error, const char *msg, const xmlChar * str1, const xmlChar * str2) { diff --git a/test/HTML/758605.html b/test/HTML/758605.html new file mode 100644 index 0000000..9b1b3c2 --- /dev/null +++ b/test/HTML/758605.html @@ -0,0 +1 @@ +&:ê diff --git a/test/HTML/758606.html b/test/HTML/758606.html new file mode 100644 index 0000000..01a013c --- /dev/null +++ b/test/HTML/758606.html @@ -0,0 +1 @@ + + +

+

+
diff --git a/test/cdata-3-byte-UTF-8.xml b/test/cdata-3-byte-UTF-8.xml new file mode 100644 index 0000000..b959a12 --- /dev/null +++ b/test/cdata-3-byte-UTF-8.xml @@ -0,0 +1,7 @@ + + + +

+

+

+
diff --git a/test/cdata-4-byte-UTF-8.xml b/test/cdata-4-byte-UTF-8.xml new file mode 100644 index 0000000..4d1d9a8 --- /dev/null +++ b/test/cdata-4-byte-UTF-8.xml @@ -0,0 +1,8 @@ + + + +

+

+

+

+
diff --git a/test/ent_738805.xml b/test/ent_738805.xml new file mode 100644 index 0000000..9ec70b1 --- /dev/null +++ b/test/ent_738805.xml @@ -0,0 +1,16 @@ + + + +]> + + + + + +&a; should appear after colon: &a; +&b; should appear after colon: &a; +&a; should appear after colon: &b; +&b; should appear after colon: &b; + + diff --git a/test/errors/754946.xml b/test/errors/754946.xml new file mode 100644 index 0000000..6b5f9b0 --- /dev/null +++ b/test/errors/754946.xml @@ -0,0 +1 @@ +%SYSTEM; \ No newline at end of file diff --git a/test/errors/758588.xml b/test/errors/758588.xml new file mode 100644 index 0000000..bec7e93 --- /dev/null +++ b/test/errors/758588.xml @@ -0,0 +1 @@ + + + + + + + + + + + + + + + + + +"> + +'"> + + + + + + + + + +amp, +lt, +gt, +apos, +quot"> + + + + + +]> + + + + + +
+Extensible Markup Language (XML) 1.0 + +REC-xml-&iso6.doc.date; +W3C Recommendation +&draft.day;&draft.month;&draft.year; + + + +http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date; + +http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.xml + +http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.html + +http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.pdf + +http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.ps + + + +httÿÿÿ€www.w3.org/TR/REC-xml + + + +http://www.w3.org/TR/PR-xml-971208 + + + +Tim Bray +Textuality and Netscape +tbray@textuality.com +Jean Paoli +Microsoft +jeanpa@microsoft.com +C. M. Sperberg-McQueen +University of Illinois at Chicago +cmsmcq@uic.edu + + +

The Extensible Markup Language (XML) is a subset of +SGML that is completely described in this document. Its goal is to +enable generic SGML to be served, received, and processed on the Web +in the way that is now possible with HTML. XML has been designed for +ease of implementation and for interoperability with both SGML and +HTML.

+
+ +

This document has been reviewed by W3C Members and +other interested parties and has been endorsed by the +Director as a W3C Recommendation. It is a stable +document and may be used as reference material or cited +as a normative reference from another document. W3C's +role in making the Recommendation is to draw attention +to the spPcification and to promote its widespread +deployment. This enhances the functionality and +interoperability of the Web.

+

+This document specifies a syntax created by subsetting an existing, +widely used international text processing standard (Standard +Generalized Markup Language, ISO 8879:1986(E) as amended and +corrected) for use on the World Wide Web. It is a product of the W3C +XML Activity, details of which can be found at http://www.w3.org/XML. A list of +current W3C Recommendations and other technical documents can be found +at http://www.w3.org/TR. +

+

This specification uses the term URI, which is defined by , a work in progress expected to update and . +

+

The list of known errors in this specification is +available at +http://www.w3.org/XML/xml-19980210-errata.

+

Please report errors in this document to +xml-editor@w3.org. +

+
+ + + +

Chicago, Vancouver, Mountain View, et al.: +World-Wide Web Consortium, XML Working Group, 1996, 1997.

+
+ +

Created in electronic form.

+
+ +English +Extended Backus-Naur Form (formal grammar) + + + +1997-12-03 : CMSMcQ : yet further changes +1997-12-02 : TB : further changes (see TB to XML WG, +2 December 1997) +1997-12-02 : CMSMcQ : deal with as many corrections and +comments from the proofreaders as possible: +entify hard-coded document date in pubdate element, +change expansion of entity WebSGML, +update status description as per Dan Connolly (am not sure +about refernece to Berners-Lee et al.), +add 'The' to abstract as per WG decision, +move Relationship to Existing Standards to back matter and +combine with References, +re-order back matter so normative appendices come first, +re-tag back matter so informative appendices are tagged informdiv1, +remove XXX XXX from list of 'normative' specs in prose, +move some references from Other References to Normative References, +add RFC 1738, 1808, and 2141 to Other References (they are not +normative since we do not require the processor to enforce any +rules based on them), +add reference to 'Fielding draft' (Berners-Lee et al.), +move notation section to end of body, +drop URIchar non-terminal and use SkipLit instead, +lose stray reference to defunct nonterminal 'markupdecls', +move reference to Aho et al. into appendix (Tim's right), +add prose note saying that hash marks and fragment identifiers are +NOT part of the URI formally speaking, and are NOT legal in +system identifiers (processor 'may' signal an error). +Work through: +Tim Bray reacting to James Clark, +Tim Bray on his own, +Eve Maler, + +NOT DONE YET: +change binary / text to unparsed / parsed. +handle James's suggestion about < in attriubte values +uppercase hex characters, +namechar list, + +1997-12-01 : JB : add some column-width parameters +1997-12-01 : CMSMcQ : begin round of changes to incorporate +recent WG decisions and other corrections: +binding sources of character encoding info (27 Aug / 3 Sept), +correct wording of Faust quotation (restore dropped line), +drop SDD from EncodingDecl, +change text at version number 1.0, +drop misleading (wrong!) sentence about ignorables and extenders, +modify definÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙxamples with Byte Order Mark. +Add content model as a term and clarify that it applies to both +mixed and element content. + +1997-06-30 : CMSMcQ : change date, some cosmetic changes, +changes to productions for choice, seq, Mixed, NotationType, +Enumeration. Follow James Clark's suggestion and prohibit +conditional sections in internal subset. TO DO: simplify +production for ignored sections as a result, since we don't +need to worry about parsers whi +1997-06-29 : TB : various edits +1997-06-29 : CMSMcQ : further changes: +Suppress old FINAL EDIT comments and some dead material. +Revise occurrences of % in grammar to exploit Henry Thompson's pun, +especially markupdecl and attdef. +Remove RMD requirement relating to element content (?). + +1997-06-28 : CMSMcQ : Various changes for 1 July draft: +Add text for draconian error handling (introduce +the term Fatal Error). +RE deleta est (changing wording from +original announcement to restrict the requirement to validating +parsers). +Tag definition of validawwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww it meant 'may or may not'. +1997-03-21 : TB : massive changes on plane flight from Chicago +to Vancouver +1997-03-21 : CMSMcQ : correct as many reported errors as possible. + +1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec. +1997 James Clark: +Define the set of characters from which [^abc] subtracts. +Charref should use just [0-9] not Digit. +Location info needs cleaner treatment: remove? (ERB +question). +One example of a PI has wrong pic. +Clarify discussion of encoding names. +Encoding failure should lead to unspecified results; don't +prescribe error recovery. +Don't require exposure of entity boundaries. +Ignore white space in element content. +Reserve entity names of the form u-NNNN. +Clarify relative URLs. +And some of my own: +Correct productions for content model: model cannot +consist of a name, so "elements ::= cp" is no good. + +1996-11-11 : CMSMcQ : revise for style. +Add new rhs to entity declaration, for parameter entities. +1996-11-10 : CMSMcQ : revise for style. +Fix / complete section on names, characters. +Add sections on parameter entities, conditional sections. +Still to do: Add compatibility note on deterministic content models. +Finish stylistic revision. +1996-10-31 : TB : Add Entity Handling section +1996-10-30 : TB : Clean up term & termdef. Slip in +ERB decision re EMPTY. +1996-10-28 : TB : Change DTD. Implement some of Michael's +suggestions. Change comments back to //. Introduce language for +XML namespace reservation. Add section on white-space handling. +Lots more cleanup. +1996-10-24 : CMSMcQ : quick tweaks, implement some ERB +decisions. Characters are not integers. Comments are /* */ not //. +Add bibliographic refs to 10646, HyTime, Unicode. +Rename old Cdata as MsData since it's only seen +in marked sections. Call them attribute-value pairs not +name-value pairs, except once. Internal subset is optional, needs +'?'. Implied attributes should be signaled to the app, not +have values supplied by processor. +1996-10-16 : TB : track down & excise all DSD references; +introduce some EBNF for entity declarations. +1996-10-?? nsistency check, fix up scraps so +they all parse, get formatter working, correct a few productions. +1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and +organizational changes: +Replace a few literals with xmlpio and +pi""entities, to make them consistent and ensure we can change pic +reliably when the ERB votes. +Drop paragraph on recognizers from notation section. +Add match, exact match to terminology. +Move old 2.2 XML Processors and Apps into intro. +Mention comments, PIs, and marked sections in discussion of +delimiter escaping. +Streamline discussion of doctype decl syntax. +Drop old section of 'PI syntax' for doctype decl, and add +section on partial-DTD summary PIs to end of Logical Structures +section. +Revise DSD syntax section to use Tim's subset-in-a-PI +mechanism. +1996-10-10 : TB : eliminate name recognizers (and more?) +1996-10-09 : CMSMcQ : revise for style, consistency through 2.3 +(Characters) +1996-10-09 : CMSMcQ : re-unite everything for convenience, +at least temporarily, and revise quickly +1996-10-08 : TB : first major homogenization pass +1996-10-08 : TB : turn "current" attribute on div type into +CDATA +1996-10-02 : TB : remould into skeleton + entities +1996-09-30 : CMSMcQ : add a few more sections prior to exchange + with Tim. +1996-09-20 : CMSMcQ : finish transcribing notes. +1996-09-19 : CMSMcQ : begin transcribing notes for draft. +1996-09-13 : CMSMcQ : made outline from notes of 09-06, +do some housekeeping + + +
+<ðððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððððm> is used to read XML documents +and provide access to their content and structure.
It is @ssumed that an XML processor is +doing its work on behalf of another module, called the +application. This specification describes the +required beh\vior of an XML processor in terms of how it must read XML +data and the information it must provide to the application.

+ + +Origin and Goals +

XML was developed by an XML Working Group (orisable over the +Internet.

+

XML shall support a wide variey of applications.

+

XML shall be compatible with SGML.

+

It shall be easy to write programs which process XML +documents.

+

The number of optional features in XML is to be kept to the +absolute minimum, ideally zero.

+

XML documents shou \ No newline at end of file diff --git a/test/errors/759573-2.xml b/test/errors/759573-2.xml new file mode 100644 index 0000000..5ad655f --- /dev/null +++ b/test/errors/759573-2.xml @@ -0,0 +1,9 @@ + + + +' > +%xx;ÿggKENSMYNT#MENTDŴzz;'> +r.B"/> +e %xx; \ No newline at end of file diff --git a/test/relaxng/565219.rng b/test/relaxng/565219.rng new file mode 100644 index 0000000..087ed95 --- /dev/null +++ b/test/relaxng/565219.rng @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/test/relaxng/565219_0.xml b/test/relaxng/565219_0.xml new file mode 100644 index 0000000..a964a07 --- /dev/null +++ b/test/relaxng/565219_0.xml @@ -0,0 +1 @@ + diff --git a/test/relaxng/565219_1.xml b/test/relaxng/565219_1.xml new file mode 100644 index 0000000..f1999f8 --- /dev/null +++ b/test/relaxng/565219_1.xml @@ -0,0 +1 @@ + diff --git a/test/relaxng/565219_2.xml b/test/relaxng/565219_2.xml new file mode 100644 index 0000000..a964a07 --- /dev/null +++ b/test/relaxng/565219_2.xml @@ -0,0 +1 @@ + diff --git a/test/relaxng/pattern3.rng b/test/relaxng/pattern3.rng new file mode 100644 index 0000000..fa4434f --- /dev/null +++ b/test/relaxng/pattern3.rng @@ -0,0 +1,11 @@ + + + + + + [a-z]+ + + + + diff --git a/test/relaxng/pattern3_1.xml b/test/relaxng/pattern3_1.xml new file mode 100644 index 0000000..f559cd3 --- /dev/null +++ b/test/relaxng/pattern3_1.xml @@ -0,0 +1 @@ + ooo diff --git a/test/valid/737840.xml b/test/valid/737840.xml new file mode 100644 index 0000000..2d27b73 --- /dev/null +++ b/test/valid/737840.xml @@ -0,0 +1,10 @@ + + + + +]> + + + ⌖ + diff --git a/test/valid/dtds/737840.ent b/test/valid/dtds/737840.ent new file mode 100644 index 0000000..e972132 --- /dev/null +++ b/test/valid/dtds/737840.ent @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/testModule.c b/testModule.c index e399f5c..77b7ba1 100644 --- a/testModule.c +++ b/testModule.c @@ -47,7 +47,7 @@ int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { /* build the module filename, and confirm the module exists */ xmlStrPrintf(filename, sizeof(filename), - (const xmlChar*) "%s/testdso%s", + "%s/testdso%s", (const xmlChar*)MODULE_PATH, (const xmlChar*)LIBXML_MODULE_EXTENSION); diff --git a/testapi.c b/testapi.c index 0367ffd..60f4bdd 100644 --- a/testapi.c +++ b/testapi.c @@ -1034,10 +1034,10 @@ static void des_xmlAttributeType(int no ATTRIBUTE_UNUSED, xmlAttributeType val A #define gen_nb_xmlBufferAllocationScheme 4 static xmlBufferAllocationScheme gen_xmlBufferAllocationScheme(int no, int nr ATTRIBUTE_UNUSED) { - if (no == 1) return(XML_BUFFER_ALLOC_DOUBLEIT); - if (no == 2) return(XML_BUFFER_ALLOC_EXACT); - if (no == 3) return(XML_BUFFER_ALLOC_HYBRID); - if (no == 4) return(XML_BUFFER_ALLOC_IMMUTABLE); + if (no == 1) return(XML_BUFFER_ALLOC_BOUNDED); + if (no == 2) return(XML_BUFFER_ALLOC_DOUBLEIT); + if (no == 3) return(XML_BUFFER_ALLOC_EXACT); + if (no == 4) return(XML_BUFFER_ALLOC_HYBRID); return(0); } @@ -8175,7 +8175,7 @@ test_xmlDictCreateSub(void) { int mem_base; xmlDictPtr ret_val; - xmlDictPtr sub; /* an existing dictionnary */ + xmlDictPtr sub; /* an existing dictionary */ int n_sub; for (n_sub = 0;n_sub < gen_nb_xmlDictPtr;n_sub++) { @@ -8207,7 +8207,7 @@ test_xmlDictExists(void) { int mem_base; const xmlChar * ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; xmlChar * name; /* the name of the userdata */ int n_name; @@ -8263,7 +8263,7 @@ test_xmlDictLookup(void) { int mem_base; const xmlChar * ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; xmlChar * name; /* the name of the userdata */ int n_name; @@ -8309,7 +8309,7 @@ test_xmlDictOwns(void) { int mem_base; int ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; xmlChar * str; /* the string */ int n_str; @@ -8348,7 +8348,7 @@ test_xmlDictQLookup(void) { int mem_base; const xmlChar * ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; xmlChar * prefix; /* the prefix */ int n_prefix; @@ -8394,7 +8394,7 @@ test_xmlDictReference(void) { int mem_base; int ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; for (n_dict = 0;n_dict < gen_nb_xmlDictPtr;n_dict++) { @@ -8437,7 +8437,7 @@ test_xmlDictSize(void) { int mem_base; int ret_val; - xmlDictPtr dict; /* the dictionnary */ + xmlDictPtr dict; /* the dictionary */ int n_dict; for (n_dict = 0;n_dict < gen_nb_xmlDictPtr;n_dict++) { diff --git a/testdict.c b/testdict.c index 4e8581f..40bebd0 100644 --- a/testdict.c +++ b/testdict.c @@ -277,7 +277,7 @@ static int run_test2(xmlDictPtr parent) { cur++; *pref = 0; tmp = xmlDictQLookup(dict, &prefix[0], cur); - if (xmlDictQLookup(dict, &prefix[0], cur) != test2[i]) { + if (tmp != test2[i]) { fprintf(stderr, "Failed lookup check for '%s':'%s'\n", &prefix[0], cur); ret = 1; @@ -408,7 +408,7 @@ static int run_test1(void) { cur++; *pref = 0; tmp = xmlDictQLookup(dict, &prefix[0], cur); - if (xmlDictQLookup(dict, &prefix[0], cur) != test1[i]) { + if (tmp != test1[i]) { fprintf(stderr, "Failed lookup check for '%s':'%s'\n", &prefix[0], cur); ret = 1; diff --git a/threads.c b/threads.c index 8921204..b9d6cae 100644 --- a/threads.c +++ b/threads.c @@ -47,7 +47,7 @@ #ifdef HAVE_PTHREAD_H static int libxml_is_threaded = -1; -#ifdef __GNUC__ +#if defined(__GNUC__) && defined(__GLIBC__) #ifdef linux #if (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (__GNUC__ > 3) extern int pthread_once (pthread_once_t *__once_control, @@ -89,7 +89,7 @@ extern int pthread_cond_signal () __attribute((weak)); #endif #endif /* linux */ -#endif /* __GNUC__ */ +#endif /* defined(__GNUC__) && defined(__GLIBC__) */ #endif /* HAVE_PTHREAD_H */ /* @@ -415,8 +415,8 @@ xmlRMutexUnlock(xmlRMutexPtr tok ATTRIBUTE_UNUSED) pthread_mutex_unlock(&tok->lock); #elif defined HAVE_WIN32_THREADS if (tok->count > 0) { - LeaveCriticalSection(&tok->cs); tok->count--; + LeaveCriticalSection(&tok->cs); } #elif defined HAVE_BEOS_THREADS if (tok->lock->tid == find_thread(NULL)) { diff --git a/timsort.h b/timsort.h index efa3aab..795f272 100644 --- a/timsort.h +++ b/timsort.h @@ -392,62 +392,66 @@ static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const in static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr, TEMP_STORAGE_T *store, const size_t size) { - while (1) - { - int64_t A, B, C; + while (1) { + int64_t A, B, C, D; + int ABC, BCD, BD, CD; + /* if the stack only has one thing on it, we are done with the collapse */ - if (stack_curr <= 1) break; + if (stack_curr <= 1) { + break; + } + /* if this is the last merge, just do it */ - if ((stack_curr == 2) && - (stack[0].length + stack[1].length == (int64_t) size)) - { + if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { TIM_SORT_MERGE(dst, stack, stack_curr, store); stack[0].length += stack[1].length; stack_curr--; break; } /* check if the invariant is off for a stack of 2 elements */ - else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) - { + else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { TIM_SORT_MERGE(dst, stack, stack_curr, store); stack[0].length += stack[1].length; stack_curr--; break; - } - else if (stack_curr == 2) + } else if (stack_curr == 2) { break; + } - A = stack[stack_curr - 3].length; - B = stack[stack_curr - 2].length; - C = stack[stack_curr - 1].length; + B = stack[stack_curr - 3].length; + C = stack[stack_curr - 2].length; + D = stack[stack_curr - 1].length; - /* check first invariant */ - if (A <= B + C) - { - if (A < C) - { - TIM_SORT_MERGE(dst, stack, stack_curr - 1, store); - stack[stack_curr - 3].length += stack[stack_curr - 2].length; - stack[stack_curr - 2] = stack[stack_curr - 1]; - stack_curr--; - } - else - { - TIM_SORT_MERGE(dst, stack, stack_curr, store); - stack[stack_curr - 2].length += stack[stack_curr - 1].length; - stack_curr--; - } + if (stack_curr >= 4) { + A = stack[stack_curr - 4].length; + ABC = (A <= B + C); + } else { + ABC = 0; } - /* check second invariant */ - else if (B <= C) - { + + BCD = (B <= C + D) || ABC; + CD = (C <= D); + BD = (B < D); + + /* Both invariants are good */ + if (!BCD && !CD) { + break; + } + + /* left merge */ + if (BCD && !CD) { + TIM_SORT_MERGE(dst, stack, stack_curr - 1, store); + stack[stack_curr - 3].length += stack[stack_curr - 2].length; + stack[stack_curr - 2] = stack[stack_curr - 1]; + stack_curr--; + } else { + /* right merge */ TIM_SORT_MERGE(dst, stack, stack_curr, store); stack[stack_curr - 2].length += stack[stack_curr - 1].length; stack_curr--; } - else - break; } + return stack_curr; } diff --git a/tree.c b/tree.c index 307782c..9d330b8 100644 --- a/tree.c +++ b/tree.c @@ -1044,7 +1044,7 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, * DICT_FREE: * @str: a string * - * Free a string if it is not owned by the "dict" dictionnary in the + * Free a string if it is not owned by the "dict" dictionary in the * current scope */ #define DICT_FREE(str) \ @@ -1057,7 +1057,7 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, * DICT_COPY: * @str: a string * - * Copy a string using a "dict" dictionnary in the current scope, + * Copy a string using a "dict" dictionary in the current scope, * if availabe. */ #define DICT_COPY(str, cpy) \ @@ -1074,7 +1074,7 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, * DICT_CONST_COPY: * @str: a string * - * Copy a string using a "dict" dictionnary in the current scope, + * Copy a string using a "dict" dictionary in the current scope, * if availabe. */ #define DICT_CONST_COPY(str, cpy) \ @@ -1451,9 +1451,9 @@ xmlStringLenGetNodeList(const xmlDoc *doc, const xmlChar *value, int len) { node->content = xmlBufDetach(buf); if (last == NULL) { - last = ret = node; + ret = node; } else { - last = xmlAddNextSibling(last, node); + xmlAddNextSibling(last, node); } } else if (ret == NULL) { ret = xmlNewDocText(doc, BAD_CAST ""); @@ -1593,6 +1593,7 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { else if ((ent != NULL) && (ent->children == NULL)) { xmlNodePtr temp; + ent->children = (xmlNodePtr) -1; ent->children = xmlStringGetNodeList(doc, (const xmlChar*)node->content); ent->owner = 1; @@ -1639,9 +1640,9 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { node->content = xmlBufDetach(buf); if (last == NULL) { - last = ret = node; + ret = node; } else { - last = xmlAddNextSibling(last, node); + xmlAddNextSibling(last, node); } } @@ -2270,7 +2271,7 @@ xmlNewNodeEatName(xmlNsPtr ns, xmlChar *name) { cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); if (cur == NULL) { xmlTreeErrMemory("building node"); - /* we can't check here that name comes from the doc dictionnary */ + /* we can't check here that name comes from the doc dictionary */ return(NULL); } memset(cur, 0, sizeof(xmlNode)); @@ -2350,7 +2351,7 @@ xmlNewDocNodeEatName(xmlDocPtr doc, xmlNsPtr ns, UPDATE_LAST_CHILD_AND_PARENT(cur) } } else { - /* if name don't come from the doc dictionnary free it here */ + /* if name don't come from the doc dictionary free it here */ if ((name != NULL) && (doc != NULL) && (!(xmlDictOwns(doc->dict, name)))) xmlFree(name); @@ -2799,8 +2800,27 @@ xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) { if(tree->type == XML_ELEMENT_NODE) { prop = tree->properties; while (prop != NULL) { + if (prop->atype == XML_ATTRIBUTE_ID) { + xmlRemoveID(tree->doc, prop); + } + prop->doc = doc; xmlSetListDoc(prop->children, doc); + + /* + * TODO: ID attributes should be also added to the new + * document, but this breaks things like xmlReplaceNode. + * The underlying problem is that xmlRemoveID is only called + * if a node is destroyed, not if it's unlinked. + */ +#if 0 + if (xmlIsID(doc, tree, prop)) { + xmlChar *idVal = xmlNodeListGetString(doc, prop->children, + 1); + xmlAddID(NULL, doc, idVal, prop); + } +#endif + prop = prop->next; } } @@ -3682,7 +3702,7 @@ xmlFreeNodeList(xmlNodePtr cur) { * When a node is a text node or a comment, it uses a global static * variable for the name of the node. * Otherwise the node name might come from the document's - * dictionnary + * dictionary */ if ((cur->name != NULL) && (cur->type != XML_TEXT_NODE) && @@ -3751,7 +3771,7 @@ xmlFreeNode(xmlNodePtr cur) { /* * When a node is a text node or a comment, it uses a global static * variable for the name of the node. - * Otherwise the node name might come from the document's dictionnary + * Otherwise the node name might come from the document's dictionary */ if ((cur->name != NULL) && (cur->type != XML_TEXT_NODE) && diff --git a/uri.c b/uri.c index ff47abb..2bd5720 100644 --- a/uri.c +++ b/uri.c @@ -314,7 +314,7 @@ xmlParse3986Query(xmlURIPtr uri, const char **str) * @uri: pointer to an URI structure * @str: the string to analyze * - * Parse a port part and fills in the appropriate fields + * Parse a port part and fills in the appropriate fields * of the @uri structure * * port = *DIGIT @@ -325,15 +325,16 @@ static int xmlParse3986Port(xmlURIPtr uri, const char **str) { const char *cur = *str; + unsigned port = 0; /* unsigned for defined overflow behavior */ if (ISA_DIGIT(cur)) { - if (uri != NULL) - uri->port = 0; while (ISA_DIGIT(cur)) { - if (uri != NULL) - uri->port = uri->port * 10 + (*cur - '0'); + port = port * 10 + (*cur - '0'); + cur++; } + if (uri != NULL) + uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */ *str = cur; return(0); } diff --git a/valid.c b/valid.c index 409aa81..19f84b8 100644 --- a/valid.c +++ b/valid.c @@ -93,7 +93,7 @@ xmlVErrMemory(xmlValidCtxtPtr ctxt, const char *extra) * * Handle a validation error */ -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlErrValid(xmlValidCtxtPtr ctxt, xmlParserErrors error, const char *msg, const char *extra) { @@ -137,7 +137,7 @@ xmlErrValid(xmlValidCtxtPtr ctxt, xmlParserErrors error, * * Handle a validation error, provide contextual informations */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlErrValidNode(xmlValidCtxtPtr ctxt, xmlNodePtr node, xmlParserErrors error, const char *msg, const xmlChar * str1, @@ -180,7 +180,7 @@ xmlErrValidNode(xmlValidCtxtPtr ctxt, * * Handle a validation error, provide contextual informations */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlErrValidNodeNr(xmlValidCtxtPtr ctxt, xmlNodePtr node, xmlParserErrors error, const char *msg, const xmlChar * str1, @@ -221,7 +221,7 @@ xmlErrValidNodeNr(xmlValidCtxtPtr ctxt, * * Handle a validation error, provide contextual information */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlErrValidWarning(xmlValidCtxtPtr ctxt, xmlNodePtr node, xmlParserErrors error, const char *msg, const xmlChar * str1, @@ -2532,7 +2532,7 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { * DICT_FREE: * @str: a string * - * Free a string if it is not owned by the "dict" dictionnary in the + * Free a string if it is not owned by the "dict" dictionary in the * current scope */ #define DICT_FREE(str) \ @@ -2634,8 +2634,10 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, /* * The id is already defined in this DTD. */ - xmlErrValidNode(ctxt, attr->parent, XML_DTD_ID_REDEFINED, - "ID %s already defined\n", value, NULL, NULL); + if (ctxt != NULL) { + xmlErrValidNode(ctxt, attr->parent, XML_DTD_ID_REDEFINED, + "ID %s already defined\n", value, NULL, NULL); + } #endif /* LIBXML_VALID_ENABLED */ xmlFreeID(ret); return(NULL); diff --git a/win32/VC10/config.h b/win32/VC10/config.h index 8629944..891b57e 100644 --- a/win32/VC10/config.h +++ b/win32/VC10/config.h @@ -96,7 +96,9 @@ static int isnan (double d) { #if defined(_MSC_VER) #define mkdir(p,m) _mkdir(p) +#if _MSC_VER < 1900 // Cannot define this in VS 2015 and above! #define snprintf _snprintf +#endif #if _MSC_VER < 1500 #define vsnprintf(b,c,f,a) _vsnprintf(b,c,f,a) #endif diff --git a/win32/configure.js b/win32/configure.js index edd943a..92b9ba0 100644 --- a/win32/configure.js +++ b/win32/configure.js @@ -14,7 +14,7 @@ var srcDirUtils = ".."; var baseName = "libxml2"; /* Configure file which contains the version and the output file where we can store our build configuration. */ -var configFile = srcDirXml + "\\configure.in"; +var configFile = srcDirXml + "\\configure.ac"; var versionFile = ".\\config.msvc"; /* Input and output files regarding the libxml features. */ var optsFileIn = srcDirXml + "\\include\\libxml\\xmlversion.h.in"; diff --git a/xinclude.c b/xinclude.c index ff3dafb..e3bb43e 100644 --- a/xinclude.c +++ b/xinclude.c @@ -125,7 +125,7 @@ xmlXIncludeErrMemory(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node, * * Handle an XInclude error */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlXIncludeErr(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node, int error, const char *msg, const xmlChar *extra) { @@ -147,7 +147,7 @@ xmlXIncludeErr(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node, int error, * * Emit an XInclude warning. */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlXIncludeWarn(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node, int error, const char *msg, const xmlChar *extra) { diff --git a/xmlIO.c b/xmlIO.c index e628ab0..1a79c09 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -1334,7 +1334,7 @@ xmlGzfileClose (void * context) { } #endif /* HAVE_ZLIB_H */ -#ifdef HAVE_LZMA_H +#ifdef LIBXML_LZMA_ENABLED /************************************************************************ * * * I/O for compressed file accesses * @@ -1451,7 +1451,7 @@ xmlXzfileClose (void * context) { if (ret < 0) xmlIOErr(0, "xzclose()"); return(ret); } -#endif /* HAVE_LZMA_H */ +#endif /* LIBXML_LZMA_ENABLED */ #ifdef LIBXML_HTTP_ENABLED /************************************************************************ @@ -1604,7 +1604,7 @@ xmlCreateZMemBuff( int compression ) { xmlFreeZMemBuff( buff ); buff = NULL; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlCreateZMemBuff: %s %d\n", + "xmlCreateZMemBuff: %s %d\n", "Error initializing compression context. ZLIB error:", z_err ); xmlIOErr(XML_IO_WRITE, (const char *) msg); @@ -1672,7 +1672,7 @@ xmlZMemBuffExtend( xmlZMemBuffPtr buff, size_t ext_amt ) { else { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlZMemBuffExtend: %s %lu bytes.\n", + "xmlZMemBuffExtend: %s %lu bytes.\n", "Allocation failure extending output buffer to", new_size ); xmlIOErr(XML_IO_WRITE, (const char *) msg); @@ -1718,7 +1718,7 @@ xmlZMemBuffAppend( xmlZMemBuffPtr buff, const char * src, int len ) { if ( z_err != Z_OK ) { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlZMemBuffAppend: %s %d %s - %d", + "xmlZMemBuffAppend: %s %d %s - %d", "Compression error while appending", len, "bytes to buffer. ZLIB error", z_err ); xmlIOErr(XML_IO_WRITE, (const char *) msg); @@ -1791,7 +1791,7 @@ xmlZMemBuffGetContent( xmlZMemBuffPtr buff, char ** data_ref ) { else { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlZMemBuffGetContent: %s - %d\n", + "xmlZMemBuffGetContent: %s - %d\n", "Error flushing zlib buffers. Error code", z_err ); xmlIOErr(XML_IO_WRITE, (const char *) msg); } @@ -1996,7 +1996,7 @@ xmlIOHTTPWrite( void * context, const char * buffer, int len ) { if ( len < 0 ) { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlIOHTTPWrite: %s\n%s '%s'.\n", + "xmlIOHTTPWrite: %s\n%s '%s'.\n", "Error appending to internal buffer.", "Error sending document to URI", ctxt->uri ); @@ -2068,7 +2068,7 @@ xmlIOHTTPCloseWrite( void * context, const char * http_mthd ) { if ( http_content == NULL ) { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlIOHTTPCloseWrite: %s '%s' %s '%s'.\n", + "xmlIOHTTPCloseWrite: %s '%s' %s '%s'.\n", "Error retrieving content.\nUnable to", http_mthd, "data to URI", ctxt->uri ); xmlIOErr(XML_IO_WRITE, (const char *) msg); @@ -2140,7 +2140,7 @@ xmlIOHTTPCloseWrite( void * context, const char * http_mthd ) { else { xmlChar msg[500]; xmlStrPrintf(msg, 500, - (const xmlChar *) "xmlIOHTTPCloseWrite: HTTP '%s' of %d %s\n'%s' %s %d\n", + "xmlIOHTTPCloseWrite: HTTP '%s' of %d %s\n'%s' %s %d\n", http_mthd, content_lgth, "bytes to URI", ctxt->uri, "failed. HTTP return code:", http_rtn ); @@ -2328,10 +2328,10 @@ xmlRegisterDefaultInputCallbacks(void) { xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen, xmlGzfileRead, xmlGzfileClose); #endif /* HAVE_ZLIB_H */ -#ifdef HAVE_LZMA_H +#ifdef LIBXML_LZMA_ENABLED xmlRegisterInputCallbacks(xmlXzfileMatch, xmlXzfileOpen, xmlXzfileRead, xmlXzfileClose); -#endif /* HAVE_ZLIB_H */ +#endif /* LIBXML_LZMA_ENABLED */ #ifdef LIBXML_HTTP_ENABLED xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlIOHTTPOpen, @@ -2683,7 +2683,7 @@ __xmlParserInputBufferCreateFilename(const char *URI, xmlCharEncoding enc) { #endif } #endif -#ifdef HAVE_LZMA_H +#ifdef LIBXML_LZMA_ENABLED if ((xmlInputCallbackTable[i].opencallback == xmlXzfileOpen) && (strcmp(URI, "-") != 0)) { ret->compressed = __libxml2_xzcompressed(context); @@ -3350,7 +3350,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { * try to establish compressed status of input if not done already */ if (in->compressed == -1) { -#ifdef HAVE_LZMA_H +#ifdef LIBXML_LZMA_ENABLED if (in->readcallback == xmlXzfileRead) in->compressed = __libxml2_xzcompressed(in->context); #endif diff --git a/xmlcatalog.c b/xmlcatalog.c index b9ed6a4..006f0cc 100644 --- a/xmlcatalog.c +++ b/xmlcatalog.c @@ -80,6 +80,7 @@ xmlShellReadline(const char *prompt) { if (prompt != NULL) fprintf(stdout, "%s", prompt); + fflush(stdout); if (!fgets(line_read, 500, stdin)) return(NULL); line_read[500] = 0; diff --git a/xmllint.c b/xmllint.c index b297ded..67f7adb 100644 --- a/xmllint.c +++ b/xmllint.c @@ -449,7 +449,7 @@ startTimer(void) * message about the timing performed; format is a printf * type argument */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(1,2) endTimer(const char *fmt, ...) { long msec; @@ -485,7 +485,7 @@ startTimer(void) { begin = clock(); } -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(1,2) endTimer(const char *fmt, ...) { long msec; @@ -514,7 +514,7 @@ startTimer(void) * Do nothing */ } -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(1,2) endTimer(char *format, ...) { /* @@ -634,7 +634,7 @@ xmlHTMLPrintFileContext(xmlParserInputPtr input) { * Display and format an error messages, gives file, line, position and * extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlHTMLError(void *ctx, const char *msg, ...) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; @@ -671,7 +671,7 @@ xmlHTMLError(void *ctx, const char *msg, ...) * Display and format a warning messages, gives file, line, position and * extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlHTMLWarning(void *ctx, const char *msg, ...) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; @@ -709,7 +709,7 @@ xmlHTMLWarning(void *ctx, const char *msg, ...) * Display and format an validity error messages, gives file, * line, position and extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlHTMLValidityError(void *ctx, const char *msg, ...) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; @@ -746,7 +746,7 @@ xmlHTMLValidityError(void *ctx, const char *msg, ...) * Display and format a validity warning messages, gives file, line, * position and extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlHTMLValidityWarning(void *ctx, const char *msg, ...) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; @@ -809,6 +809,7 @@ xmlShellReadline(char *prompt) { if (prompt != NULL) fprintf(stdout, "%s", prompt); + fflush(stdout); if (!fgets(line_read, 500, stdin)) return(NULL); line_read[500] = 0; @@ -1410,7 +1411,7 @@ commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value) * Display and format a warning messages, gives file, line, position and * extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { va_list args; @@ -1433,7 +1434,7 @@ warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) * Display and format a error messages, gives file, line, position and * extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { va_list args; @@ -1456,7 +1457,7 @@ errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) * Display and format a fatalError messages, gives file, line, position and * extra parameters. */ -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { va_list args; @@ -2001,6 +2002,12 @@ static void walkDoc(xmlDocPtr doc) { xmlNsPtr ns; root = xmlDocGetRootElement(doc); + if (root == NULL ) { + xmlGenericError(xmlGenericErrorContext, + "Document does not have a root element"); + progresult = XMLLINT_ERR_UNCLASS; + return; + } for (ns = root->nsDef, i = 0;ns != NULL && i < 20;ns=ns->next) { namespaces[i++] = ns->href; namespaces[i++] = ns->prefix; @@ -2967,6 +2974,7 @@ static void showVersion(const char *name) { if (xmlHasFeature(XML_WITH_XPTR)) fprintf(stderr, "XPointer "); if (xmlHasFeature(XML_WITH_XINCLUDE)) fprintf(stderr, "XInclude "); if (xmlHasFeature(XML_WITH_ICONV)) fprintf(stderr, "Iconv "); + if (xmlHasFeature(XML_WITH_ICU)) fprintf(stderr, "ICU "); if (xmlHasFeature(XML_WITH_ISO8859X)) fprintf(stderr, "ISO8859X "); if (xmlHasFeature(XML_WITH_UNICODE)) fprintf(stderr, "Unicode "); if (xmlHasFeature(XML_WITH_REGEXP)) fprintf(stderr, "Regexps "); @@ -3046,7 +3054,7 @@ static void usage(const char *name) { printf("\t--noblanks : drop (ignorable?) blanks spaces\n"); printf("\t--nocdata : replace cdata section with text nodes\n"); #ifdef LIBXML_OUTPUT_ENABLED - printf("\t--format : reformat/reindent the input\n"); + printf("\t--format : reformat/reindent the output\n"); printf("\t--encode encoding : output in the given encoding\n"); printf("\t--dropdtd : remove the DOCTYPE of the input docs\n"); printf("\t--pretty STYLE : pretty-print in a particular style\n"); diff --git a/xmlmemory.c b/xmlmemory.c index a3dc737..f08c8c3 100644 --- a/xmlmemory.c +++ b/xmlmemory.c @@ -109,6 +109,7 @@ typedef struct memnod { #define RESERVE_SIZE (((HDR_SIZE + (ALIGN_SIZE-1)) \ / ALIGN_SIZE ) * ALIGN_SIZE) +#define MAX_SIZE_T ((size_t)-1) #define CLIENT_2_HDR(a) ((MEMHDR *) (((char *) (a)) - RESERVE_SIZE)) #define HDR_2_CLIENT(a) ((void *) (((char *) (a)) + RESERVE_SIZE)) @@ -217,7 +218,7 @@ xmlMallocLoc(size_t size, const char * file, int line) /** * xmlMallocAtomicLoc: - * @size: an int specifying the size in byte to allocate. + * @size: an unsigned int specifying the size in byte to allocate. * @file: the file name or NULL * @line: the line number * @@ -240,11 +241,18 @@ xmlMallocAtomicLoc(size_t size, const char * file, int line) TEST_POINT + if (size > (MAX_SIZE_T - RESERVE_SIZE)) { + xmlGenericError(xmlGenericErrorContext, + "xmlMallocAtomicLoc : Unsigned overflow prevented\n"); + xmlMemoryDump(); + return(NULL); + } + p = (MEMHDR *) malloc(RESERVE_SIZE+size); if (!p) { xmlGenericError(xmlGenericErrorContext, - "xmlMallocLoc : Out of free space\n"); + "xmlMallocAtomicLoc : Out of free space\n"); xmlMemoryDump(); return(NULL); } @@ -554,7 +562,12 @@ xmlMemoryStrdup(const char *str) { int xmlMemUsed(void) { - return(debugMemSize); + int res; + + xmlMutexLock(xmlMemMutex); + res = debugMemSize; + xmlMutexUnlock(xmlMemMutex); + return(res); } /** @@ -567,7 +580,12 @@ xmlMemUsed(void) { int xmlMemBlocks(void) { - return(debugMemBlocks); + int res; + + xmlMutexLock(xmlMemMutex); + res = debugMemBlocks; + xmlMutexUnlock(xmlMemMutex); + return(res); } #ifdef MEM_LIST diff --git a/xmlreader.c b/xmlreader.c index f19e123..f285790 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -142,7 +142,7 @@ struct _xmlTextReader { xmlNodePtr faketext;/* fake xmlNs chld */ int preserve;/* preserve the resulting document */ xmlBufPtr buffer; /* used to return const xmlChar * */ - xmlDictPtr dict; /* the context dictionnary */ + xmlDictPtr dict; /* the context dictionary */ /* entity stack when traversing entities content */ xmlNodePtr ent; /* Current Entity Ref Node */ @@ -210,7 +210,7 @@ static int xmlTextReaderNextTree(xmlTextReaderPtr reader); * DICT_FREE: * @str: a string * - * Free a string if it is not owned by the "dict" dictionnary in the + * Free a string if it is not owned by the "dict" dictionary in the * current scope */ #define DICT_FREE(str) \ @@ -2091,6 +2091,9 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { "xmlNewTextReader : malloc failed\n"); return(NULL); } + /* no operation on a reader should require a huge buffer */ + xmlBufSetAllocationScheme(ret->buffer, + XML_BUFFER_ALLOC_BOUNDED); ret->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (ret->sax == NULL) { xmlBufFree(ret->buffer); @@ -2155,7 +2158,7 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { ret->ctxt->dictNames = 1; ret->allocs = XML_TEXTREADER_CTXT; /* - * use the parser dictionnary to allocate all elements and attributes names + * use the parser dictionary to allocate all elements and attributes names */ ret->ctxt->docdict = 1; ret->dict = ret->ctxt->dict; @@ -3616,6 +3619,7 @@ xmlTextReaderConstValue(xmlTextReaderPtr reader) { return(((xmlNsPtr) node)->href); case XML_ATTRIBUTE_NODE:{ xmlAttrPtr attr = (xmlAttrPtr) node; + const xmlChar *ret; if ((attr->children != NULL) && (attr->children->type == XML_TEXT_NODE) && @@ -3629,10 +3633,21 @@ xmlTextReaderConstValue(xmlTextReaderPtr reader) { "xmlTextReaderSetup : malloc failed\n"); return (NULL); } + xmlBufSetAllocationScheme(reader->buffer, + XML_BUFFER_ALLOC_BOUNDED); } else xmlBufEmpty(reader->buffer); xmlBufGetNodeContent(reader->buffer, node); - return(xmlBufContent(reader->buffer)); + ret = xmlBufContent(reader->buffer); + if (ret == NULL) { + /* error on the buffer best to reallocate */ + xmlBufFree(reader->buffer); + reader->buffer = xmlBufCreateSize(100); + xmlBufSetAllocationScheme(reader->buffer, + XML_BUFFER_ALLOC_BOUNDED); + ret = BAD_CAST ""; + } + return(ret); } break; } @@ -4035,13 +4050,19 @@ xmlTextReaderCurrentDoc(xmlTextReaderPtr reader) { } #ifdef LIBXML_SCHEMAS_ENABLED -static char *xmlTextReaderBuildMessage(const char *msg, va_list ap); +static char *xmlTextReaderBuildMessage(const char *msg, va_list ap) LIBXML_ATTR_FORMAT(1,0); + +static void XMLCDECL +xmlTextReaderValidityError(void *ctxt, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); static void XMLCDECL -xmlTextReaderValidityError(void *ctxt, const char *msg, ...); +xmlTextReaderValidityWarning(void *ctxt, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); static void XMLCDECL -xmlTextReaderValidityWarning(void *ctxt, const char *msg, ...); +xmlTextReaderValidityErrorRelay(void *ctx, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); + +static void XMLCDECL +xmlTextReaderValidityWarningRelay(void *ctx, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); static void XMLCDECL xmlTextReaderValidityErrorRelay(void *ctx, const char *msg, ...) @@ -4835,7 +4856,7 @@ xmlTextReaderStructuredError(void *ctxt, xmlErrorPtr error) } } -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlTextReaderError(void *ctxt, const char *msg, ...) { va_list ap; @@ -4848,7 +4869,7 @@ xmlTextReaderError(void *ctxt, const char *msg, ...) } -static void XMLCDECL +static void XMLCDECL LIBXML_ATTR_FORMAT(2,3) xmlTextReaderWarning(void *ctxt, const char *msg, ...) { va_list ap; @@ -5131,6 +5152,9 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, "xmlTextReaderSetup : malloc failed\n"); return (-1); } + /* no operation on a reader should require a huge buffer */ + xmlBufSetAllocationScheme(reader->buffer, + XML_BUFFER_ALLOC_BOUNDED); if (reader->sax == NULL) reader->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (reader->sax == NULL) { @@ -5231,7 +5255,7 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, reader->ctxt->linenumbers = 1; reader->ctxt->dictNames = 1; /* - * use the parser dictionnary to allocate all elements and attributes names + * use the parser dictionary to allocate all elements and attributes names */ reader->ctxt->docdict = 1; reader->ctxt->parseMode = XML_PARSE_READER; diff --git a/xmlregexp.c b/xmlregexp.c index 3e912ab..ca3b4f4 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -1544,6 +1544,7 @@ static int xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, xmlRegAtomPtr atom) { xmlRegStatePtr end; + int nullable = 0; if (atom == NULL) { ERROR("genrate transition: atom == NULL"); @@ -1730,6 +1731,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, if (xmlRegAtomPush(ctxt, atom) < 0) { return(-1); } + if ((atom->quant == XML_REGEXP_QUANT_RANGE) && + (atom->min == 0) && (atom->max > 0)) { + nullable = 1; + atom->min = 1; + if (atom->max == 1) + atom->quant = XML_REGEXP_QUANT_OPT; + } xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); ctxt->state = end; switch (atom->quant) { @@ -1747,11 +1755,8 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; case XML_REGEXP_QUANT_RANGE: -#if DV_test - if (atom->min == 0) { + if (nullable) xmlFAGenerateEpsilonTransition(ctxt, from, to); - } -#endif break; default: break; @@ -5052,11 +5057,12 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { ERROR("Expecting the end of a char range"); return; } - NEXTL(len); + /* TODO check that the values are acceptable character ranges for XML */ if (end < start) { ERROR("End of range is before start of range"); } else { + NEXTL(len); xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, XML_REGEXP_CHARVAL, start, end, NULL); } @@ -6345,7 +6351,7 @@ struct _xmlExpCtxt { /** * xmlExpNewCtxt: * @maxNodes: the maximum number of nodes - * @dict: optional dictionnary to use internally + * @dict: optional dictionary to use internally * * Creates a new context for manipulating expressions * @@ -7204,7 +7210,7 @@ xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, return(NULL); } /* - * check the string is in the dictionnary, if yes use an interned + * check the string is in the dictionary, if yes use an interned * copy, otherwise we know it's not an acceptable input */ input = xmlDictExists(ctxt->dict, str, len); diff --git a/xmlsave.c b/xmlsave.c index 774404b..4a8e3f3 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -2097,8 +2097,8 @@ xmlBufAttrSerializeTxtContent(xmlBufPtr buf, xmlDocPtr doc, xmlBufAdd(buf, BAD_CAST "&", 5); cur++; base = cur; - } else if ((*cur >= 0x80) && ((doc == NULL) || - (doc->encoding == NULL))) { + } else if ((*cur >= 0x80) && (cur[1] != 0) && + ((doc == NULL) || (doc->encoding == NULL))) { /* * We assume we have UTF-8 content. */ @@ -2121,14 +2121,14 @@ xmlBufAttrSerializeTxtContent(xmlBufPtr buf, xmlDocPtr doc, val <<= 6; val |= (cur[1]) & 0x3F; l = 2; - } else if (*cur < 0xF0) { + } else if ((*cur < 0xF0) && (cur [2] != 0)) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; - } else if (*cur < 0xF8) { + } else if ((*cur < 0xF8) && (cur [2] != 0) && (cur[3] != 0)) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; diff --git a/xmlschemas.c b/xmlschemas.c index 0657b66..e1b3a4f 100644 --- a/xmlschemas.c +++ b/xmlschemas.c @@ -617,7 +617,7 @@ struct _xmlSchemaParserCtxt { xmlAutomataStatePtr end; xmlAutomataStatePtr state; - xmlDictPtr dict; /* dictionnary for interned string names */ + xmlDictPtr dict; /* dictionary for interned string names */ xmlSchemaTypePtr ctxtType; /* The current context simple/complex type */ int options; xmlSchemaValidCtxtPtr vctxt; @@ -1085,7 +1085,7 @@ xmlSchemaGetUnionSimpleTypeMemberTypes(xmlSchemaTypePtr type); static void xmlSchemaInternalErr(xmlSchemaAbstractCtxtPtr actxt, const char *funcName, - const char *message); + const char *message) LIBXML_ATTR_FORMAT(3,0); static int xmlSchemaCheckCOSSTDerivedOK(xmlSchemaAbstractCtxtPtr ctxt, xmlSchemaTypePtr type, @@ -1769,7 +1769,7 @@ xmlSchemaFormatItemForReport(xmlChar **buf, } FREE_AND_NULL(str) - return (*buf); + return (xmlEscapeFormatString(buf)); } /** @@ -1889,7 +1889,7 @@ xmlSchemaPErrMemory(xmlSchemaParserCtxtPtr ctxt, * * Handle a parser error */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlSchemaPErr(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, int error, const char *msg, const xmlChar * str1, const xmlChar * str2) { @@ -1922,7 +1922,7 @@ xmlSchemaPErr(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, int error, * * Handle a parser error */ -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaPErr2(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, xmlNodePtr child, int error, const char *msg, const xmlChar * str1, const xmlChar * str2) @@ -1951,7 +1951,7 @@ xmlSchemaPErr2(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, * * Handle a parser error */ -static void +static void LIBXML_ATTR_FORMAT(7,0) xmlSchemaPErrExt(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, int error, const xmlChar * strData1, const xmlChar * strData2, const xmlChar * strData3, const char *msg, const xmlChar * str1, @@ -2002,7 +2002,7 @@ xmlSchemaVErrMemory(xmlSchemaValidCtxtPtr ctxt, extra); } -static void +static void LIBXML_ATTR_FORMAT(2,0) xmlSchemaPSimpleInternalErr(xmlNodePtr node, const char *msg, const xmlChar *str) { @@ -2013,18 +2013,21 @@ xmlSchemaPSimpleInternalErr(xmlNodePtr node, #define WXS_ERROR_TYPE_ERROR 1 #define WXS_ERROR_TYPE_WARNING 2 /** - * xmlSchemaErr3: + * xmlSchemaErr4Line: * @ctxt: the validation context - * @node: the context node + * @errorLevel: the error level * @error: the error code + * @node: the context node + * @line: the line number * @msg: the error message * @str1: extra data * @str2: extra data * @str3: extra data + * @str4: extra data * * Handle a validation error */ -static void +static void LIBXML_ATTR_FORMAT(6,0) xmlSchemaErr4Line(xmlSchemaAbstractCtxtPtr ctxt, xmlErrorLevel errorLevel, int error, xmlNodePtr node, int line, const char *msg, @@ -2139,7 +2142,7 @@ xmlSchemaErr4Line(xmlSchemaAbstractCtxtPtr ctxt, * * Handle a validation error */ -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlSchemaErr3(xmlSchemaAbstractCtxtPtr actxt, int error, xmlNodePtr node, const char *msg, const xmlChar *str1, const xmlChar *str2, const xmlChar *str3) @@ -2148,7 +2151,7 @@ xmlSchemaErr3(xmlSchemaAbstractCtxtPtr actxt, msg, str1, str2, str3, NULL); } -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlSchemaErr4(xmlSchemaAbstractCtxtPtr actxt, int error, xmlNodePtr node, const char *msg, const xmlChar *str1, const xmlChar *str2, @@ -2158,7 +2161,7 @@ xmlSchemaErr4(xmlSchemaAbstractCtxtPtr actxt, msg, str1, str2, str3, str4); } -static void +static void LIBXML_ATTR_FORMAT(4,0) xmlSchemaErr(xmlSchemaAbstractCtxtPtr actxt, int error, xmlNodePtr node, const char *msg, const xmlChar *str1, const xmlChar *str2) @@ -2181,7 +2184,7 @@ xmlSchemaFormatNodeForError(xmlChar ** msg, /* * Don't try to format other nodes than element and * attribute nodes. - * Play save and return an empty string. + * Play safe and return an empty string. */ *msg = xmlStrdup(BAD_CAST ""); return(*msg); @@ -2246,6 +2249,13 @@ xmlSchemaFormatNodeForError(xmlChar ** msg, TODO return (NULL); } + + /* + * xmlSchemaFormatItemForReport() also returns an escaped format + * string, so do this before calling it below (in the future). + */ + xmlEscapeFormatString(msg); + /* * VAL TODO: The output of the given schema component is currently * disabled. @@ -2262,7 +2272,7 @@ xmlSchemaFormatNodeForError(xmlChar ** msg, return (*msg); } -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlSchemaInternalErr2(xmlSchemaAbstractCtxtPtr actxt, const char *funcName, const char *message, @@ -2273,24 +2283,21 @@ xmlSchemaInternalErr2(xmlSchemaAbstractCtxtPtr actxt, if (actxt == NULL) return; - msg = xmlStrdup(BAD_CAST "Internal error: "); - msg = xmlStrcat(msg, BAD_CAST funcName); - msg = xmlStrcat(msg, BAD_CAST ", "); + msg = xmlStrdup(BAD_CAST "Internal error: %s, "); msg = xmlStrcat(msg, BAD_CAST message); msg = xmlStrcat(msg, BAD_CAST ".\n"); if (actxt->type == XML_SCHEMA_CTXT_VALIDATOR) - xmlSchemaErr(actxt, XML_SCHEMAV_INTERNAL, NULL, - (const char *) msg, str1, str2); - + xmlSchemaErr3(actxt, XML_SCHEMAV_INTERNAL, NULL, + (const char *) msg, (const xmlChar *) funcName, str1, str2); else if (actxt->type == XML_SCHEMA_CTXT_PARSER) - xmlSchemaErr(actxt, XML_SCHEMAP_INTERNAL, NULL, - (const char *) msg, str1, str2); + xmlSchemaErr3(actxt, XML_SCHEMAP_INTERNAL, NULL, + (const char *) msg, (const xmlChar *) funcName, str1, str2); FREE_AND_NULL(msg) } -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlSchemaInternalErr(xmlSchemaAbstractCtxtPtr actxt, const char *funcName, const char *message) @@ -2299,7 +2306,7 @@ xmlSchemaInternalErr(xmlSchemaAbstractCtxtPtr actxt, } #if 0 -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlSchemaPInternalErr(xmlSchemaParserCtxtPtr pctxt, const char *funcName, const char *message, @@ -2311,7 +2318,7 @@ xmlSchemaPInternalErr(xmlSchemaParserCtxtPtr pctxt, } #endif -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaCustomErr4(xmlSchemaAbstractCtxtPtr actxt, xmlParserErrors error, xmlNodePtr node, @@ -2336,7 +2343,7 @@ xmlSchemaCustomErr4(xmlSchemaAbstractCtxtPtr actxt, FREE_AND_NULL(msg) } -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaCustomErr(xmlSchemaAbstractCtxtPtr actxt, xmlParserErrors error, xmlNodePtr node, @@ -2351,7 +2358,7 @@ xmlSchemaCustomErr(xmlSchemaAbstractCtxtPtr actxt, -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaCustomWarning(xmlSchemaAbstractCtxtPtr actxt, xmlParserErrors error, xmlNodePtr node, @@ -2376,7 +2383,7 @@ xmlSchemaCustomWarning(xmlSchemaAbstractCtxtPtr actxt, -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaKeyrefErr(xmlSchemaValidCtxtPtr vctxt, xmlParserErrors error, xmlSchemaPSVIIDCNodePtr idcNode, @@ -2476,11 +2483,13 @@ xmlSchemaSimpleTypeErr(xmlSchemaAbstractCtxtPtr actxt, msg = xmlStrcat(msg, BAD_CAST " '"); if (type->builtInType != 0) { msg = xmlStrcat(msg, BAD_CAST "xs:"); - msg = xmlStrcat(msg, type->name); - } else - msg = xmlStrcat(msg, - xmlSchemaFormatQName(&str, - type->targetNamespace, type->name)); + str = xmlStrdup(type->name); + } else { + const xmlChar *qName = xmlSchemaFormatQName(&str, type->targetNamespace, type->name); + if (!str) + str = xmlStrdup(qName); + } + msg = xmlStrcat(msg, xmlEscapeFormatString(&str)); msg = xmlStrcat(msg, BAD_CAST "'"); FREE_AND_NULL(str); } @@ -2525,7 +2534,7 @@ xmlSchemaIllegalAttrErr(xmlSchemaAbstractCtxtPtr actxt, FREE_AND_NULL(msg) } -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaComplexTypeErr(xmlSchemaAbstractCtxtPtr actxt, xmlParserErrors error, xmlNodePtr node, @@ -2617,7 +2626,7 @@ xmlSchemaComplexTypeErr(xmlSchemaAbstractCtxtPtr actxt, str = xmlStrcat(str, BAD_CAST ", "); } str = xmlStrcat(str, BAD_CAST " ).\n"); - msg = xmlStrcat(msg, BAD_CAST str); + msg = xmlStrcat(msg, xmlEscapeFormatString(&str)); FREE_AND_NULL(str) } else msg = xmlStrcat(msg, BAD_CAST "\n"); @@ -2625,7 +2634,7 @@ xmlSchemaComplexTypeErr(xmlSchemaAbstractCtxtPtr actxt, xmlFree(msg); } -static void +static void LIBXML_ATTR_FORMAT(8,0) xmlSchemaFacetErr(xmlSchemaAbstractCtxtPtr actxt, xmlParserErrors error, xmlNodePtr node, @@ -2916,7 +2925,7 @@ xmlSchemaPIllegalAttrErr(xmlSchemaParserCtxtPtr ctxt, * * Reports an error during parsing. */ -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaPCustomErrExt(xmlSchemaParserCtxtPtr ctxt, xmlParserErrors error, xmlSchemaBasicItemPtr item, @@ -2952,7 +2961,7 @@ xmlSchemaPCustomErrExt(xmlSchemaParserCtxtPtr ctxt, * * Reports an error during parsing. */ -static void +static void LIBXML_ATTR_FORMAT(5,0) xmlSchemaPCustomErr(xmlSchemaParserCtxtPtr ctxt, xmlParserErrors error, xmlSchemaBasicItemPtr item, @@ -2977,7 +2986,7 @@ xmlSchemaPCustomErr(xmlSchemaParserCtxtPtr ctxt, * * Reports an attribute use error during parsing. */ -static void +static void LIBXML_ATTR_FORMAT(6,0) xmlSchemaPAttrUseErr4(xmlSchemaParserCtxtPtr ctxt, xmlParserErrors error, xmlNodePtr node, @@ -3099,7 +3108,7 @@ xmlSchemaPMutualExclAttrErr(xmlSchemaParserCtxtPtr ctxt, * Reports a simple type validation error. * TODO: Should this report the value of an element as well? */ -static void +static void LIBXML_ATTR_FORMAT(8,0) xmlSchemaPSimpleTypeErr(xmlSchemaParserCtxtPtr ctxt, xmlParserErrors error, xmlSchemaBasicItemPtr ownerItem ATTRIBUTE_UNUSED, @@ -3141,11 +3150,13 @@ xmlSchemaPSimpleTypeErr(xmlSchemaParserCtxtPtr ctxt, msg = xmlStrcat(msg, BAD_CAST " '"); if (type->builtInType != 0) { msg = xmlStrcat(msg, BAD_CAST "xs:"); - msg = xmlStrcat(msg, type->name); - } else - msg = xmlStrcat(msg, - xmlSchemaFormatQName(&str, - type->targetNamespace, type->name)); + str = xmlStrdup(type->name); + } else { + const xmlChar *qName = xmlSchemaFormatQName(&str, type->targetNamespace, type->name); + if (!str) + str = xmlStrdup(qName); + } + msg = xmlStrcat(msg, xmlEscapeFormatString(&str)); msg = xmlStrcat(msg, BAD_CAST "'."); FREE_AND_NULL(str); } @@ -3158,7 +3169,9 @@ xmlSchemaPSimpleTypeErr(xmlSchemaParserCtxtPtr ctxt, } if (expected) { msg = xmlStrcat(msg, BAD_CAST " Expected is '"); - msg = xmlStrcat(msg, BAD_CAST expected); + xmlChar *expectedEscaped = xmlCharStrdup(expected); + msg = xmlStrcat(msg, xmlEscapeFormatString(&expectedEscaped)); + FREE_AND_NULL(expectedEscaped); msg = xmlStrcat(msg, BAD_CAST "'.\n"); } else msg = xmlStrcat(msg, BAD_CAST "\n"); @@ -24186,6 +24199,7 @@ xmlSchemaValidateFacets(xmlSchemaAbstractCtxtPtr actxt, else goto pattern_and_enum; } + /* * Whitespace handling is only of importance for string-based * types. @@ -24196,14 +24210,13 @@ xmlSchemaValidateFacets(xmlSchemaAbstractCtxtPtr actxt, ws = xmlSchemaGetWhiteSpaceFacetValue(type); } else ws = XML_SCHEMA_WHITESPACE_COLLAPSE; + /* * If the value was not computed (for string or * anySimpleType based types), then use the provided * type. */ - if (val == NULL) - valType = valType; - else + if (val != NULL) valType = xmlSchemaGetValType(val); ret = 0; @@ -25546,7 +25559,7 @@ xmlSchemaVAttributesComplex(xmlSchemaValidCtxtPtr vctxt) if (xmlNewProp(defAttrOwnerElem, iattr->localName, value) == NULL) { VERROR_INT("xmlSchemaVAttributesComplex", - "callling xmlNewProp()"); + "calling xmlNewProp()"); if (normValue != NULL) xmlFree(normValue); goto internal_error; @@ -27382,10 +27395,17 @@ xmlSchemaSAXHandleStartElementNs(void *ctx, for (j = 0, i = 0; i < nb_attributes; i++, j += 5) { /* - * Duplicate the value. + * Duplicate the value, changing any & to a literal ampersand. + * + * libxml2 differs from normal SAX here in that it escapes all ampersands + * as & instead of delivering the raw converted string. Changing the + * behavior at this point would break applications that use this API, so + * we are forced to work around it. There is no danger of accidentally + * decoding some entity other than & in this step because without + * unescaped ampersands there can be no other entities in the string. */ - value = xmlStrndup(attributes[j+3], - attributes[j+4] - attributes[j+3]); + value = xmlStringLenDecodeEntities(vctxt->parserCtxt, attributes[j+3], + attributes[j+4] - attributes[j+3], XML_SUBSTITUTE_REF, 0, 0, 0); /* * TODO: Set the node line. */ diff --git a/xmlschemastypes.c b/xmlschemastypes.c index ff64f50..5f38599 100644 --- a/xmlschemastypes.c +++ b/xmlschemastypes.c @@ -62,7 +62,7 @@ struct _xmlSchemaValDate { long year; unsigned int mon :4; /* 1 <= mon <= 12 */ unsigned int day :5; /* 1 <= day <= 31 */ - unsigned int hour :5; /* 0 <= hour <= 23 */ + unsigned int hour :5; /* 0 <= hour <= 24 */ unsigned int min :6; /* 0 <= min <= 59 */ double sec; unsigned int tz_flag :1; /* is tzo explicitely set? */ @@ -1139,9 +1139,13 @@ static const unsigned int daysInMonthLeap[12] = #define VALID_DATE(dt) \ (VALID_YEAR(dt->year) && VALID_MONTH(dt->mon) && VALID_MDAY(dt)) +#define VALID_END_OF_DAY(dt) \ + ((dt)->hour == 24 && (dt)->min == 0 && (dt)->sec == 0) + #define VALID_TIME(dt) \ - (VALID_HOUR(dt->hour) && VALID_MIN(dt->min) && \ - VALID_SEC(dt->sec) && VALID_TZO(dt->tzo)) + (((VALID_HOUR(dt->hour) && VALID_MIN(dt->min) && \ + VALID_SEC(dt->sec)) || VALID_END_OF_DAY(dt)) && \ + VALID_TZO(dt->tzo)) #define VALID_DATETIME(dt) \ (VALID_DATE(dt) && VALID_TIME(dt)) @@ -1355,7 +1359,7 @@ _xmlSchemaParseTime (xmlSchemaValDatePtr dt, const xmlChar **str) { return ret; if (*cur != ':') return 1; - if (!VALID_HOUR(value)) + if (!VALID_HOUR(value) && value != 24 /* Allow end-of-day hour */) return 2; cur++; @@ -1377,7 +1381,7 @@ _xmlSchemaParseTime (xmlSchemaValDatePtr dt, const xmlChar **str) { if (ret != 0) return ret; - if ((!VALID_SEC(dt->sec)) || (!VALID_TZO(dt->tzo))) + if (!VALID_TIME(dt)) return 2; *str = cur; @@ -5303,6 +5307,7 @@ xmlSchemaValidateFacetInternal(xmlSchemaFacetPtr facet, xmlSchemaWhitespaceValueType ws) { int ret; + int stringType; if (facet == NULL) return(-1); @@ -5315,7 +5320,15 @@ xmlSchemaValidateFacetInternal(xmlSchemaFacetPtr facet, */ if (value == NULL) return(-1); - ret = xmlRegexpExec(facet->regexp, value); + /* + * If string-derived type, regexp must be tested on the value space of + * the datatype. + * See https://www.w3.org/TR/xmlschema-2/#rf-pattern + */ + stringType = val && ((val->type >= XML_SCHEMAS_STRING && val->type <= XML_SCHEMAS_NORMSTRING) + || (val->type >= XML_SCHEMAS_TOKEN && val->type <= XML_SCHEMAS_NCNAME)); + ret = xmlRegexpExec(facet->regexp, + (stringType && val->value.str) ? val->value.str : value); if (ret == 1) return(0); if (ret == 0) diff --git a/xmlstring.c b/xmlstring.c index a37220d..cc85777 100644 --- a/xmlstring.c +++ b/xmlstring.c @@ -457,6 +457,8 @@ xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { return(xmlStrndup(add, len)); size = xmlStrlen(cur); + if (size < 0) + return(NULL); ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlErrMemory(NULL, NULL); @@ -484,14 +486,19 @@ xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) { int size; xmlChar *ret; - if (len < 0) + if (len < 0) { len = xmlStrlen(str2); + if (len < 0) + return(NULL); + } if ((str2 == NULL) || (len == 0)) return(xmlStrdup(str1)); if (str1 == NULL) return(xmlStrndup(str2, len)); size = xmlStrlen(str1); + if (size < 0) + return(NULL); ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlErrMemory(NULL, NULL); @@ -538,7 +545,7 @@ xmlStrcat(xmlChar *cur, const xmlChar *add) { * Returns the number of characters written to @buf or -1 if an error occurs. */ int XMLCDECL -xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) { +xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) { va_list args; int ret; @@ -566,7 +573,7 @@ xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) { * Returns the number of characters written to @buf or -1 if an error occurs. */ int -xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) { +xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) { int ret; if((buf == NULL) || (msg == NULL)) { @@ -837,8 +844,8 @@ xmlUTF8Strsize(const xmlChar *utf, int len) { break; if ( (ch = *ptr++) & 0x80) while ((ch<<=1) & 0x80 ) { - ptr++; if (*ptr == 0) break; + ptr++; } } return (ptr - utf); @@ -980,5 +987,60 @@ xmlUTF8Strsub(const xmlChar *utf, int start, int len) { return(xmlUTF8Strndup(utf, len)); } +/** + * xmlEscapeFormatString: + * @msg: a pointer to the string in which to escape '%' characters. + * Must be a heap-allocated buffer created by libxml2 that may be + * returned, or that may be freed and replaced. + * + * Replaces the string pointed to by 'msg' with an escaped string. + * Returns the same string with all '%' characters escaped. + */ +xmlChar * +xmlEscapeFormatString(xmlChar **msg) +{ + xmlChar *msgPtr = NULL; + xmlChar *result = NULL; + xmlChar *resultPtr = NULL; + size_t count = 0; + size_t msgLen = 0; + size_t resultLen = 0; + + if (!msg || !*msg) + return(NULL); + + for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) { + ++msgLen; + if (*msgPtr == '%') + ++count; + } + + if (count == 0) + return(*msg); + + resultLen = msgLen + count + 1; + result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar)); + if (result == NULL) { + /* Clear *msg to prevent format string vulnerabilities in + out-of-memory situations. */ + xmlFree(*msg); + *msg = NULL; + xmlErrMemory(NULL, NULL); + return(NULL); + } + + for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) { + *resultPtr = *msgPtr; + if (*msgPtr == '%') + *(++resultPtr) = '%'; + } + result[resultLen - 1] = '\0'; + + xmlFree(*msg); + *msg = result; + + return *msg; +} + #define bottom_xmlstring #include "elfgcchack.h" diff --git a/xmlwriter.c b/xmlwriter.c index fac20ac..69541b8 100644 --- a/xmlwriter.c +++ b/xmlwriter.c @@ -113,7 +113,7 @@ static int xmlTextWriterWriteDocCallback(void *context, const xmlChar * str, int len); static int xmlTextWriterCloseDocCallback(void *context); -static xmlChar *xmlTextWriterVSprintf(const char *format, va_list argptr); +static xmlChar *xmlTextWriterVSprintf(const char *format, va_list argptr) LIBXML_ATTR_FORMAT(1,0); static int xmlOutputBufferWriteBase64(xmlOutputBufferPtr out, int len, const unsigned char *data); static void xmlTextWriterStartDocumentCallback(void *ctx); @@ -153,7 +153,7 @@ xmlWriterErrMsg(xmlTextWriterPtr ctxt, xmlParserErrors error, * * Handle a writer error */ -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlWriterErrMsgInt(xmlTextWriterPtr ctxt, xmlParserErrors error, const char *msg, int val) { diff --git a/xpath.c b/xpath.c index dc41ce6..113bce6 100644 --- a/xpath.c +++ b/xpath.c @@ -361,14 +361,14 @@ turtle_comparison: /* * compute depth to root */ - for (depth2 = 0, cur = node2;cur->parent != NULL;cur = cur->parent) { - if (cur == node1) + for (depth2 = 0, cur = node2; cur->parent != NULL; cur = cur->parent) { + if (cur->parent == node1) return(1); depth2++; } root = cur; - for (depth1 = 0, cur = node1;cur->parent != NULL;cur = cur->parent) { - if (cur == node2) + for (depth1 = 0, cur = node1; cur->parent != NULL; cur = cur->parent) { + if (cur->parent == node2) return(-1); depth1++; } @@ -639,7 +639,7 @@ xmlXPathErrMemory(xmlXPathContextPtr ctxt, const char *extra) xmlChar buf[200]; xmlStrPrintf(buf, 200, - BAD_CAST "Memory allocation failed : %s\n", + "Memory allocation failed : %s\n", extra); ctxt->lastError.message = (char *) xmlStrdup(buf); } else { @@ -945,7 +945,7 @@ struct _xmlXPathCompExpr { xmlXPathStepOp *steps; /* ops for computation of this expression */ int last; /* index of last step in expression */ xmlChar *expr; /* the expression being computed */ - xmlDictPtr dict; /* the dictionnary to use if any */ + xmlDictPtr dict; /* the dictionary to use if any */ #ifdef DEBUG_EVAL_COUNTS int nb; xmlChar *string; @@ -3706,7 +3706,7 @@ xmlXPathNodeSetAdd(xmlNodeSetPtr cur, xmlNodePtr val) { /* @@ with_ns to check whether namespace nodes should be looked at @@ */ /* - * prevent duplcates + * prevent duplicates */ for (i = 0;i < cur->nodeNr;i++) if (cur->nodeTab[i] == val) return(0); @@ -7933,14 +7933,14 @@ xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { xmlNodePtr xmlXPathNextDescendantOrSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { if ((ctxt == NULL) || (ctxt->context == NULL)) return(NULL); - if (cur == NULL) { - if (ctxt->context->node == NULL) - return(NULL); - if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || - (ctxt->context->node->type == XML_NAMESPACE_DECL)) - return(NULL); + if (cur == NULL) return(ctxt->context->node); - } + + if (ctxt->context->node == NULL) + return(NULL); + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); return(xmlXPathNextDescendant(ctxt, cur)); } @@ -8390,7 +8390,7 @@ xmlNodePtr xmlXPathNextNamespace(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { if ((ctxt == NULL) || (ctxt->context == NULL)) return(NULL); if (ctxt->context->node->type != XML_ELEMENT_NODE) return(NULL); - if (ctxt->context->tmpNsList == NULL && cur != (xmlNodePtr) xmlXPathXMLNamespace) { + if (cur == NULL) { if (ctxt->context->tmpNsList != NULL) xmlFree(ctxt->context->tmpNsList); ctxt->context->tmpNsList = @@ -9996,7 +9996,7 @@ xmlXPathParseNameComplex(xmlXPathParserContextPtr ctxt, int qualified) { (c == '[') || (c == ']') || (c == '@') || /* accelerators */ (c == '*') || /* accelerators */ (!IS_LETTER(c) && (c != '_') && - ((qualified) && (c != ':')))) { + ((!qualified) || (c != ':')))) { return(NULL); } @@ -12379,11 +12379,6 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, STRANGE goto error; case NODE_TEST_TYPE: - /* - * TODO: Don't we need to use - * xmlXPathNodeSetAddNs() for namespace nodes here? - * Surprisingly, some c14n tests fail, if we do this. - */ if (type == NODE_TYPE_NODE) { switch (cur->type) { case XML_DOCUMENT_NODE: @@ -12397,9 +12392,17 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, case XML_COMMENT_NODE: case XML_CDATA_SECTION_NODE: case XML_TEXT_NODE: - case XML_NAMESPACE_DECL: XP_TEST_HIT break; + case XML_NAMESPACE_DECL: { + if (axis == AXIS_NAMESPACE) { + XP_TEST_HIT_NS + } else { + hasNsNodes = 1; + XP_TEST_HIT + } + break; + } default: break; } @@ -12691,6 +12694,14 @@ error: * Reset the context node. */ xpctxt->node = oldContextNode; + /* + * When traversing the namespace axis in "toBool" mode, it's + * possible that tmpNsList wasn't freed. + */ + if (xpctxt->tmpNsList != NULL) { + xmlFree(xpctxt->tmpNsList); + xpctxt->tmpNsList = NULL; + } #ifdef DEBUG_STEP xmlGenericError(xmlGenericErrorContext, @@ -14784,6 +14795,10 @@ xmlXPathOptimizeExpression(xmlXPathCompExprPtr comp, xmlXPathStepOpPtr op) } } + /* OP_VALUE has invalid ch1. */ + if (op->op == XPATH_OP_VALUE) + return; + /* Recurse */ if (op->ch1 != -1) xmlXPathOptimizeExpression(comp, &comp->steps[op->ch1]); diff --git a/xpointer.c b/xpointer.c index 4b4ac2e..676c510 100644 --- a/xpointer.c +++ b/xpointer.c @@ -85,7 +85,7 @@ xmlXPtrErrMemory(const char *extra) * * Handle a redefinition of attribute error */ -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlXPtrErr(xmlXPathParserContextPtr ctxt, int error, const char * msg, const xmlChar *extra) { diff --git a/xstc/Makefile.in b/xstc/Makefile.in index 2a9f034..f33abdf 100644 --- a/xstc/Makefile.in +++ b/xstc/Makefile.in @@ -1,7 +1,7 @@ -# Makefile.in generated by automake 1.13.4 from Makefile.am. +# Makefile.in generated by automake 1.15 from Makefile.am. # @configure_input@ -# Copyright (C) 1994-2013 Free Software Foundation, Inc. +# Copyright (C) 1994-2014 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -14,7 +14,17 @@ @SET_MAKE@ VPATH = @srcdir@ -am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ @@ -78,7 +88,6 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = xstc -DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ @@ -86,6 +95,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = @@ -110,6 +120,7 @@ am__can_run_installinfo = \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ @@ -151,6 +162,7 @@ HTML_DIR = @HTML_DIR@ HTML_OBJ = @HTML_OBJ@ HTTP_OBJ = @HTTP_OBJ@ ICONV_LIBS = @ICONV_LIBS@ +ICU_CFLAGS = @ICU_CFLAGS@ ICU_LIBS = @ICU_LIBS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ @@ -172,8 +184,10 @@ LIBXML_VERSION_NUMBER = @LIBXML_VERSION_NUMBER@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ LZMA_CFLAGS = @LZMA_CFLAGS@ LZMA_LIBS = @LZMA_LIBS@ +MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ @@ -366,7 +380,7 @@ CLEANFILES = $(PYSCRIPTS) test.log all: all-am .SUFFIXES: -$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -378,7 +392,6 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu xstc/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu xstc/Makefile -.PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ @@ -391,9 +404,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: $(am__configure_deps) +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): $(am__aclocal_m4_deps) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -554,6 +567,8 @@ uninstall-am: mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags-am uninstall uninstall-am +.PRECIOUS: Makefile + # # Nothing is done by make, only make tests and # only if Python and Schemas are enabled. diff --git a/xzlib.c b/xzlib.c index 0dcb9f4..782957f 100644 --- a/xzlib.c +++ b/xzlib.c @@ -8,7 +8,7 @@ */ #define IN_LIBXML #include "libxml.h" -#ifdef HAVE_LZMA_H +#ifdef LIBXML_LZMA_ENABLED #include #ifdef HAVE_ERRNO_H @@ -34,7 +34,9 @@ #ifdef HAVE_ZLIB_H #include #endif +#ifdef HAVE_LZMA_H #include +#endif #include "xzlib.h" #include @@ -581,6 +583,10 @@ xz_decomp(xz_statep state) xz_error(state, LZMA_DATA_ERROR, "compressed data error"); return -1; } + if (ret == LZMA_PROG_ERROR) { + xz_error(state, LZMA_PROG_ERROR, "compression error"); + return -1; + } } while (strm->avail_out && ret != LZMA_STREAM_END); /* update available output and crc check value */ @@ -795,4 +801,4 @@ __libxml2_xzclose(xzFile file) xmlFree(state); return ret ? ret : LZMA_OK; } -#endif /* HAVE_LZMA_H */ +#endif /* LIBXML_LZMA_ENABLED */ -- 2.7.4