From 0f4a2b503dd4e7b548d325475e5e367f477e6233 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 7 Sep 2022 09:31:19 +0900 Subject: [PATCH] Imported Upstream version 2.9.14 --- CMakeLists.txt | 9 +++-- HTMLparser.c | 47 ++++++++++++------------- NEWS | 36 +++++++++++++++---- buf.c | 86 ++++++++++++++++++--------------------------- configure.ac | 11 ++++-- encoding.c | 4 +++ globals.c | 30 ++++++++-------- libxml.h | 7 ++-- parser.c | 7 ++-- python/libxml.c | 4 +++ python/setup.py.in | 2 +- result/XInclude/red.xml | 9 +++++ result/XInclude/red.xml.rdr | 26 ++++++++++++++ result/XPath/expr/strings | 24 +++++++++++++ result/regexp/issue301 | 4 +++ result/regexp/issue370 | 3 ++ result/regexp/issue65 | 2 ++ runtest.c | 12 +++++-- test/XInclude/docs/red.xml | 10 ++++++ test/XPath/expr/strings | 8 +++++ test/regexp/issue301 | 4 +++ test/regexp/issue370 | 3 ++ test/regexp/issue65 | 2 ++ tree.c | 72 ++++++++++++++----------------------- valid.c | 59 ++++++++++++++++--------------- xinclude.c | 5 +-- xmlregexp.c | 19 +++++----- xpath.c | 71 +++++++++++++++++-------------------- xpointer.c | 1 + 29 files changed, 344 insertions(+), 233 deletions(-) create mode 100644 result/XInclude/red.xml create mode 100644 result/XInclude/red.xml.rdr create mode 100644 result/regexp/issue301 create mode 100644 result/regexp/issue370 create mode 100644 result/regexp/issue65 create mode 100644 test/XInclude/docs/red.xml create mode 100644 test/regexp/issue301 create mode 100644 test/regexp/issue370 create mode 100644 test/regexp/issue65 diff --git a/CMakeLists.txt b/CMakeLists.txt index f922d5a..28c3a16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,10 +105,13 @@ foreach(VARIABLE IN ITEMS WITH_AUTOMATA WITH_C14N WITH_CATALOG WITH_DEBUG WITH_D endif() endforeach() -set(LIBXML_VERSION ${LIBXML_MAJOR_VERSION}0${LIBXML_MINOR_VERSION}0${LIBXML_MICRO_VERSION}) -set(LIBXML_VERSION_STRING "${LIBXML_VERSION}") +set(LIBXML_VERSION ${VERSION}) set(LIBXML_VERSION_EXTRA "") -set(LIBXML_VERSION_NUMBER ${LIBXML_VERSION}) +math(EXPR LIBXML_VERSION_NUMBER " + ${LIBXML_MAJOR_VERSION} * 10000 + + ${LIBXML_MINOR_VERSION} * 100 + + ${LIBXML_MICRO_VERSION} +") set(MODULE_EXTENSION "${CMAKE_SHARED_LIBRARY_SUFFIX}") diff --git a/HTMLparser.c b/HTMLparser.c index 3e8a165..e720bb2 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -614,7 +614,8 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } - res++; + if (res < INT_MAX) + res++; } return(res); } @@ -3960,26 +3961,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, "htmlParseStartTag: invalid element name\n", NULL, NULL); - /* - * The recovery code is disabled for now as it can result in - * quadratic behavior with the push parser. htmlParseStartTag - * must consume all content up to the final '>' in order to avoid - * rescanning for this terminator. - * - * For a proper fix in line with HTML5, htmlParseStartTag and - * htmlParseElement should only be called when there's an ASCII - * alpha character following the initial '<'. Otherwise, the '<' - * should be emitted as text (unless followed by '!', '/' or '?'). - */ -#if 0 - /* if recover preserve text on classic misconstructs */ - if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') || - (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) { - htmlParseCharDataInternal(ctxt, '<'); - return(-1); - } -#endif - /* Dump the bogus tag like browsers do */ while ((CUR != 0) && (CUR != '>') && (ctxt->instate != XML_PARSER_EOF)) @@ -4432,9 +4413,15 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { /* * Third case : a sub-element. */ - else if (CUR == '<') { + else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) { htmlParseElement(ctxt); } + else if (CUR == '<') { + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1); + NEXT; + } /* * Fourth case : a reference. If if has not been resolved, @@ -4831,13 +4818,19 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { /* * Third case : a sub-element. */ - else if (CUR == '<') { + else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) { htmlParseElementInternal(ctxt); if (currentNode != NULL) xmlFree(currentNode); currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; } + else if (CUR == '<') { + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1); + NEXT; + } /* * Fourth case : a reference. If if has not been resolved, @@ -6004,7 +5997,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { "HPP: entering END_TAG\n"); #endif break; - } else if (cur == '<') { + } else if ((cur == '<') && IS_ASCII_LETTER(next)) { if ((!terminate) && (next == 0)) goto done; ctxt->instate = XML_PARSER_START_TAG; @@ -6014,6 +6007,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { "HPP: entering START_TAG\n"); #endif break; + } else if (cur == '<') { + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, + BAD_CAST "<", 1); + NEXT; } else { /* * check that the text sequence is complete diff --git a/NEWS b/NEWS index 2ccdc10..c33d32a 100644 --- a/NEWS +++ b/NEWS @@ -1,12 +1,36 @@ NEWS file for libxml2 -The change log at -ChangeLog.html - describes the recents commits -to the GIT at -https://gitlab.gnome.org/GNOME/libxml2 - code base.Here is the list of public releases: +v2.9.14: May 02 2022: + - Security: + [CVE-2022-29824] Integer overflow in xmlBuf and xmlBuffer + Fix potential double-free in xmlXPtrStringRangeFunction + Fix memory leak in xmlFindCharEncodingHandler + Normalize XPath strings in-place + Prevent integer-overflow in htmlSkipBlankChars() and xmlSkipBlankChars() + (David Kilzer) + Fix leak of xmlElementContent (David Kilzer) + + - Bug fixes: + Fix parsing of subtracted regex character classes + Fix recursion check in xinclude.c + Reset last error in xmlCleanupGlobals + Fix certain combinations of regex range quantifiers + Fix range quantifier on subregex + + - Improvements: + Fix recovery from invalid HTML start tags + + - Build system, portability: + Define LFS macros before including system headers + Initialize XPath floating-point globals + configure: check for icu DEFS (James Hilliard) + configure.ac: produce tar.xz only (GNOME policy) (David Seifert) + CMakeLists.txt: Fix LIBXML_VERSION_NUMBER + Fix build with older Python versions + Fix --without-valid build + + v2.9.13: Feb 19 2022: - Security: [CVE-2022-23308] Use-after-free of ID and IDREF attributes diff --git a/buf.c b/buf.c index 24368d3..40a5ee0 100644 --- a/buf.c +++ b/buf.c @@ -30,6 +30,10 @@ #include /* for XML_MAX_TEXT_LENGTH */ #include "buf.h" +#ifndef SIZE_MAX +#define SIZE_MAX ((size_t) -1) +#endif + #define WITH_BUFFER_COMPAT /** @@ -156,6 +160,8 @@ xmlBufPtr xmlBufCreateSize(size_t size) { xmlBufPtr ret; + if (size == SIZE_MAX) + return(NULL); ret = (xmlBufPtr) xmlMalloc(sizeof(xmlBuf)); if (ret == NULL) { xmlBufMemoryError(NULL, "creating buffer"); @@ -166,8 +172,8 @@ xmlBufCreateSize(size_t size) { ret->error = 0; ret->buffer = NULL; ret->alloc = xmlBufferAllocScheme; - ret->size = (size ? size+2 : 0); /* +1 for ending null */ - ret->compat_size = (int) ret->size; + ret->size = (size ? size + 1 : 0); /* +1 for ending null */ + ret->compat_size = (ret->size > INT_MAX ? INT_MAX : ret->size); if (ret->size){ ret->content = (xmlChar *) xmlMallocAtomic(ret->size * sizeof(xmlChar)); if (ret->content == NULL) { @@ -442,23 +448,17 @@ xmlBufGrowInternal(xmlBufPtr buf, size_t len) { CHECK_COMPAT(buf) if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return(0); - if (buf->use + len < buf->size) + if (len < buf->size - buf->use) return(buf->size - buf->use); + if (len > SIZE_MAX - buf->use) + return(0); - /* - * Windows has a BIG problem on realloc timing, so we try to double - * the buffer size (if that's enough) (bug 146697) - * Apparently BSD too, and it's probably best for linux too - * On an embedded system this may be something to change - */ -#if 1 - if (buf->size > (size_t) len) - size = buf->size * 2; - else - size = buf->use + len + 100; -#else - size = buf->use + len + 100; -#endif + if (buf->size > (size_t) len) { + size = buf->size > SIZE_MAX / 2 ? SIZE_MAX : buf->size * 2; + } else { + size = buf->use + len; + size = size > SIZE_MAX - 100 ? SIZE_MAX : size + 100; + } if (buf->alloc == XML_BUFFER_ALLOC_BOUNDED) { /* @@ -744,7 +744,7 @@ xmlBufIsEmpty(const xmlBufPtr buf) int xmlBufResize(xmlBufPtr buf, size_t size) { - unsigned int newSize; + size_t newSize; xmlChar* rebuf = NULL; size_t start_buf; @@ -772,9 +772,13 @@ xmlBufResize(xmlBufPtr buf, size_t size) case XML_BUFFER_ALLOC_IO: case XML_BUFFER_ALLOC_DOUBLEIT: /*take care of empty case*/ - newSize = (buf->size ? buf->size*2 : size + 10); + if (buf->size == 0) { + newSize = (size > SIZE_MAX - 10 ? SIZE_MAX : size + 10); + } else { + newSize = buf->size; + } while (size > newSize) { - if (newSize > UINT_MAX / 2) { + if (newSize > SIZE_MAX / 2) { xmlBufMemoryError(buf, "growing buffer"); return 0; } @@ -782,15 +786,15 @@ xmlBufResize(xmlBufPtr buf, size_t size) } break; case XML_BUFFER_ALLOC_EXACT: - newSize = size+10; + newSize = (size > SIZE_MAX - 10 ? SIZE_MAX : size + 10); break; case XML_BUFFER_ALLOC_HYBRID: if (buf->use < BASE_BUFFER_SIZE) newSize = size; else { - newSize = buf->size * 2; + newSize = buf->size; while (size > newSize) { - if (newSize > UINT_MAX / 2) { + if (newSize > SIZE_MAX / 2) { xmlBufMemoryError(buf, "growing buffer"); return 0; } @@ -800,7 +804,7 @@ xmlBufResize(xmlBufPtr buf, size_t size) break; default: - newSize = size+10; + newSize = (size > SIZE_MAX - 10 ? SIZE_MAX : size + 10); break; } @@ -866,7 +870,7 @@ xmlBufResize(xmlBufPtr buf, size_t size) */ int xmlBufAdd(xmlBufPtr buf, const xmlChar *str, int len) { - unsigned int needSize; + size_t needSize; if ((str == NULL) || (buf == NULL) || (buf->error)) return -1; @@ -888,8 +892,10 @@ xmlBufAdd(xmlBufPtr buf, const xmlChar *str, int len) { if (len < 0) return -1; if (len == 0) return 0; - needSize = buf->use + len + 2; - if (needSize > buf->size){ + if ((size_t) len >= buf->size - buf->use) { + if ((size_t) len >= SIZE_MAX - buf->use) + return(-1); + needSize = buf->use + len + 1; if (buf->alloc == XML_BUFFER_ALLOC_BOUNDED) { /* * Used to provide parsing limits @@ -1025,31 +1031,7 @@ xmlBufCat(xmlBufPtr buf, const xmlChar *str) { */ int xmlBufCCat(xmlBufPtr buf, const char *str) { - const char *cur; - - if ((buf == NULL) || (buf->error)) - return(-1); - CHECK_COMPAT(buf) - if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return -1; - if (str == NULL) { -#ifdef DEBUG_BUFFER - xmlGenericError(xmlGenericErrorContext, - "xmlBufCCat: str == NULL\n"); -#endif - return -1; - } - for (cur = str;*cur != 0;cur++) { - if (buf->use + 10 >= buf->size) { - if (!xmlBufResize(buf, buf->use+10)){ - xmlBufMemoryError(buf, "growing buffer"); - return XML_ERR_NO_MEMORY; - } - } - buf->content[buf->use++] = *cur; - } - buf->content[buf->use] = 0; - UPDATE_COMPAT(buf) - return 0; + return xmlBufCat(buf, (const xmlChar *) str); } /** diff --git a/configure.ac b/configure.ac index 46faa23..73ad9ff 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ AC_PREREQ([2.63]) m4_define([MAJOR_VERSION], 2) m4_define([MINOR_VERSION], 9) -m4_define([MICRO_VERSION], 13) +m4_define([MICRO_VERSION], 14) AC_INIT([libxml2],[MAJOR_VERSION.MINOR_VERSION.MICRO_VERSION]) AC_CONFIG_SRCDIR([entities.c]) @@ -40,7 +40,7 @@ AC_SUBST(LIBXML_VERSION_EXTRA) VERSION=${LIBXML_VERSION} -AM_INIT_AUTOMAKE([foreign]) +AM_INIT_AUTOMAKE([foreign no-dist-gzip dist-xz]) # Support silent build rules, requires at least automake-1.11. Disable # by either passing --disable-silent-rules to configure or passing V=1 @@ -1475,6 +1475,13 @@ else [have_libicu=yes], [have_libicu=no]) + if test "x$have_libicu" = "xyes"; then + PKG_CHECK_VAR([ICU_DEFS], [icu-i18n], [DEFS]) + if test "x$ICU_DEFS" != "x"; then + CPPFLAGS="$CPPFLAGS $ICU_DEFS" + fi + fi + # If pkg-config failed, fall back to AC_CHECK_LIB. This # will not pick up the necessary LIBS flags for liblzma's # private dependencies, though, so static linking may fail. diff --git a/encoding.c b/encoding.c index 3741c94..c14c9ff 100644 --- a/encoding.c +++ b/encoding.c @@ -1738,6 +1738,10 @@ xmlFindCharEncodingHandler(const char *name) { } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { xmlEncodingErr(XML_ERR_INTERNAL_ERROR, "iconv : problems with filters for '%s'\n", name); + if (icv_in != (iconv_t) -1) + iconv_close(icv_in); + else + iconv_close(icv_out); } #endif /* LIBXML_ICONV_ENABLED */ #ifdef LIBXML_ICU_ENABLED diff --git a/globals.c b/globals.c index 0c0bdb4..893fb73 100644 --- a/globals.c +++ b/globals.c @@ -50,20 +50,6 @@ void xmlInitGlobals(void) xmlThrDefMutex = xmlNewMutex(); } -/** - * xmlCleanupGlobals: - * - * Additional cleanup for multi-threading - */ -void xmlCleanupGlobals(void) -{ - if (xmlThrDefMutex != NULL) { - xmlFreeMutex(xmlThrDefMutex); - xmlThrDefMutex = NULL; - } - __xmlGlobalInitMutexDestroy(); -} - /************************************************************************ * * * All the user accessible global variables of the library * @@ -578,6 +564,22 @@ xmlInitializeGlobalState(xmlGlobalStatePtr gs) } /** + * xmlCleanupGlobals: + * + * Additional cleanup for multi-threading + */ +void xmlCleanupGlobals(void) +{ + xmlResetError(&xmlLastError); + + if (xmlThrDefMutex != NULL) { + xmlFreeMutex(xmlThrDefMutex); + xmlThrDefMutex = NULL; + } + __xmlGlobalInitMutexDestroy(); +} + +/** * DOC_DISABLE : we ignore missing doc for the xmlThrDef functions, * those are really internal work */ diff --git a/libxml.h b/libxml.h index 1090729..d40fcf8 100644 --- a/libxml.h +++ b/libxml.h @@ -9,8 +9,10 @@ #ifndef __XML_LIBXML_H__ #define __XML_LIBXML_H__ -#include - +/* + * These macros must be defined before including system headers. + * Do not add any #include directives above this block. + */ #ifndef NO_LARGEFILE_SOURCE #ifndef _LARGEFILE_SOURCE #define _LARGEFILE_SOURCE @@ -39,6 +41,7 @@ #include "config.h" #include #endif +#include #if defined(__Lynx__) #include /* pull definition of size_t */ diff --git a/parser.c b/parser.c index 8ca9b2d..1bc3713 100644 --- a/parser.c +++ b/parser.c @@ -2208,7 +2208,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { ctxt->input->col++; } cur++; - res++; + if (res < INT_MAX) + res++; if (*cur == 0) { ctxt->input->cur = cur; xmlParserInputGrow(ctxt->input, INPUT_CHUNK); @@ -2244,7 +2245,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { * by the attachment of one leading and one following space (#x20) * character." */ - res++; + if (res < INT_MAX) + res++; } } return(res); @@ -14749,7 +14751,6 @@ xmlCleanupParser(void) { xmlSchemaCleanupTypes(); xmlRelaxNGCleanupTypes(); #endif - xmlResetLastError(); xmlCleanupGlobals(); xmlCleanupThreads(); /* must be last if called not from the main thread */ xmlCleanupMemory(); diff --git a/python/libxml.c b/python/libxml.c index e6a9e9f..ef63025 100644 --- a/python/libxml.c +++ b/python/libxml.c @@ -1886,6 +1886,7 @@ libxml_xmlFreeParserCtxt(ATTRIBUTE_UNUSED PyObject *self, PyObject *args) { return(Py_None); } +#ifdef LIBXML_VALID_ENABLED /*** * xmlValidCtxt stuff */ @@ -2045,6 +2046,7 @@ libxml_xmlFreeValidCtxt(PyObject *self ATTRIBUTE_UNUSED, PyObject *args) { Py_INCREF(Py_None); return(Py_None); } +#endif /* LIBXML_VALID_ENABLED */ #ifdef LIBXML_READER_ENABLED /************************************************************************ @@ -3838,8 +3840,10 @@ static PyMethodDef libxmlMethods[] = { {(char *) "doc", libxml_doc, METH_VARARGS, NULL}, {(char *) "xmlNewNode", libxml_xmlNewNode, METH_VARARGS, NULL}, {(char *) "xmlNodeRemoveNsDef", libxml_xmlNodeRemoveNsDef, METH_VARARGS, NULL}, +#ifdef LIBXML_VALID_ENABLED {(char *)"xmlSetValidErrors", libxml_xmlSetValidErrors, METH_VARARGS, NULL}, {(char *)"xmlFreeValidCtxt", libxml_xmlFreeValidCtxt, METH_VARARGS, NULL}, +#endif /* LIBXML_VALID_ENABLED */ #ifdef LIBXML_OUTPUT_ENABLED {(char *) "serializeNode", libxml_serializeNode, METH_VARARGS, NULL}, {(char *) "saveNodeTo", libxml_saveNodeTo, METH_VARARGS, NULL}, diff --git a/python/setup.py.in b/python/setup.py.in index 4a997b4..557e4c7 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -6,7 +6,7 @@ import sys, os try: import setuptools -except ModuleNotFoundError: +except ImportError: pass from distutils.core import setup, Extension diff --git a/result/XInclude/red.xml b/result/XInclude/red.xml new file mode 100644 index 0000000..87adf84 --- /dev/null +++ b/result/XInclude/red.xml @@ -0,0 +1,9 @@ + + + + Introduction + + + Introduction + + diff --git a/result/XInclude/red.xml.rdr b/result/XInclude/red.xml.rdr new file mode 100644 index 0000000..de332db --- /dev/null +++ b/result/XInclude/red.xml.rdr @@ -0,0 +1,26 @@ +0 1 book 0 0 +1 14 #text 0 1 + +1 1 chapter 0 0 +2 14 #text 0 1 + +2 1 para 0 0 +3 3 #text 0 1 Introduction +2 15 para 0 0 +2 14 #text 0 1 + +1 15 chapter 0 0 +1 14 #text 0 1 + +1 1 chapter 0 0 +2 14 #text 0 1 + +2 1 para 0 0 +3 3 #text 0 1 Introduction +2 15 para 0 0 +2 14 #text 0 1 + +1 15 chapter 0 0 +1 14 #text 0 1 + +0 15 book 0 0 diff --git a/result/XPath/expr/strings b/result/XPath/expr/strings index 4b0125c..5527b50 100644 --- a/result/XPath/expr/strings +++ b/result/XPath/expr/strings @@ -150,3 +150,27 @@ Object is a number : 0 ======================== Expression: string-length("titi") Object is a number : 4 + +======================== +Expression: normalize-space(" abc def ") +Object is a string : abc def + +======================== +Expression: normalize-space(" abc def") +Object is a string : abc def + +======================== +Expression: normalize-space("abc def ") +Object is a string : abc def + +======================== +Expression: normalize-space(" abcdef ") +Object is a string : abcdef + +======================== +Expression: normalize-space(" abcdef") +Object is a string : abcdef + +======================== +Expression: normalize-space("abcdef ") +Object is a string : abcdef diff --git a/result/regexp/issue301 b/result/regexp/issue301 new file mode 100644 index 0000000..90e7c4c --- /dev/null +++ b/result/regexp/issue301 @@ -0,0 +1,4 @@ +Regexp: (a{1,2}|ab){2} +abab: Ok +Regexp: ((1?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.){3}(1?[0-9]{1,2}|2[0-4][0-9]|25[0-5]) +192.168.254.0: Ok diff --git a/result/regexp/issue370 b/result/regexp/issue370 new file mode 100644 index 0000000..42a74ec --- /dev/null +++ b/result/regexp/issue370 @@ -0,0 +1,3 @@ +Regexp: [A-Za-[G]] +G: Fail +a: Ok diff --git a/result/regexp/issue65 b/result/regexp/issue65 new file mode 100644 index 0000000..46bacd3 --- /dev/null +++ b/result/regexp/issue65 @@ -0,0 +1,2 @@ +Regexp: ( (a|bc{0,2})){0,2} d + b d: Ok diff --git a/runtest.c b/runtest.c index 62b362d..e6b2cbb 100644 --- a/runtest.c +++ b/runtest.c @@ -2147,10 +2147,10 @@ fdParseTest(const char *filename, const char *result, const char *err, int options) { xmlDocPtr doc; const char *base = NULL; - int size, res = 0; + int size, res = 0, fd; nb_tests++; - int fd = open(filename, RD_FLAGS); + fd = open(filename, RD_FLAGS); #ifdef LIBXML_HTML_ENABLED if (options & XML_PARSE_HTML) { doc = htmlReadFd(fd, filename, NULL, options); @@ -2647,6 +2647,7 @@ xptrDocTest(const char *filename, } #endif /* LIBXML_XPTR_ENABLED */ +#ifdef LIBXML_VALID_ENABLED /** * xmlidDocTest: * @filename: the file to parse @@ -2714,6 +2715,7 @@ xmlidDocTest(const char *filename, } return(res); } +#endif /* LIBXML_VALID_ENABLED */ #endif /* LIBXML_DEBUG_ENABLED */ #endif /* XPATH */ @@ -4315,6 +4317,7 @@ testDesc testDescriptions[] = { { "XML Namespaces regression tests", errParseTest, "./test/namespaces/*", "result/namespaces/", "", ".err", 0 }, +#ifdef LIBXML_VALID_ENABLED { "Error cases regression tests", errParseTest, "./test/errors/*.xml", "result/errors/", "", ".err", 0 }, @@ -4327,10 +4330,13 @@ testDesc testDescriptions[] = { { "Error cases regression tests (old 1.0)", errParseTest, "./test/errors10/*.xml", "result/errors10/", "", ".err", XML_PARSE_OLD10 }, +#endif #ifdef LIBXML_READER_ENABLED +#ifdef LIBXML_VALID_ENABLED { "Error cases stream regression tests", streamParseTest, "./test/errors/*.xml", "result/errors/", NULL, ".str", 0 }, +#endif { "Reader regression tests", streamParseTest, "./test/*", "result/", ".rdr", NULL, 0 }, @@ -4430,11 +4436,13 @@ testDesc testDescriptions[] = { xptrDocTest, "./test/XPath/docs/*", NULL, NULL, NULL, 0 }, #endif +#ifdef LIBXML_VALID_ENABLED { "xml:id regression tests" , xmlidDocTest, "./test/xmlid/*", "result/xmlid/", "", ".err", 0 }, #endif #endif +#endif { "URI parsing tests" , uriParseTest, "./test/URI/*.uri", "result/URI/", "", NULL, 0 }, diff --git a/test/XInclude/docs/red.xml b/test/XInclude/docs/red.xml new file mode 100644 index 0000000..75ee396 --- /dev/null +++ b/test/XInclude/docs/red.xml @@ -0,0 +1,10 @@ + + + + Introduction + + + + + diff --git a/test/XPath/expr/strings b/test/XPath/expr/strings index c741ee2..ef29251 100644 --- a/test/XPath/expr/strings +++ b/test/XPath/expr/strings @@ -36,3 +36,11 @@ substring("12345",-0.7,4) substring("12345",-5000000000,5000000004) string-length("") string-length("titi") +normalize-space(" abc def ") +normalize-space(" abc def") +normalize-space("abc def ") +normalize-space(" abcdef ") +normalize-space(" abcdef") +normalize-space("abcdef ") + + diff --git a/test/regexp/issue301 b/test/regexp/issue301 new file mode 100644 index 0000000..b5a316b --- /dev/null +++ b/test/regexp/issue301 @@ -0,0 +1,4 @@ +=>(a{1,2}|ab){2} +abab +=>((1?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\.){3}(1?[0-9]{1,2}|2[0-4][0-9]|25[0-5]) +192.168.254.0 diff --git a/test/regexp/issue370 b/test/regexp/issue370 new file mode 100644 index 0000000..626b38e --- /dev/null +++ b/test/regexp/issue370 @@ -0,0 +1,3 @@ +=>[A-Za-[G]] +G +a diff --git a/test/regexp/issue65 b/test/regexp/issue65 new file mode 100644 index 0000000..b71bc07 --- /dev/null +++ b/test/regexp/issue65 @@ -0,0 +1,2 @@ +=>( (a|bc{0,2})){0,2} d + b d diff --git a/tree.c b/tree.c index 9d94aa4..86afb7d 100644 --- a/tree.c +++ b/tree.c @@ -7104,6 +7104,8 @@ xmlBufferPtr xmlBufferCreateSize(size_t size) { xmlBufferPtr ret; + if (size >= UINT_MAX) + return(NULL); ret = (xmlBufferPtr) xmlMalloc(sizeof(xmlBuffer)); if (ret == NULL) { xmlTreeErrMemory("creating buffer"); @@ -7111,7 +7113,7 @@ xmlBufferCreateSize(size_t size) { } ret->use = 0; ret->alloc = xmlBufferAllocScheme; - ret->size = (size ? size+2 : 0); /* +1 for ending null */ + ret->size = (size ? size + 1 : 0); /* +1 for ending null */ if (ret->size){ ret->content = (xmlChar *) xmlMallocAtomic(ret->size * sizeof(xmlChar)); if (ret->content == NULL) { @@ -7171,6 +7173,8 @@ xmlBufferCreateStatic(void *mem, size_t size) { if ((mem == NULL) || (size == 0)) return(NULL); + if (size > UINT_MAX) + return(NULL); ret = (xmlBufferPtr) xmlMalloc(sizeof(xmlBuffer)); if (ret == NULL) { @@ -7318,28 +7322,23 @@ xmlBufferShrink(xmlBufferPtr buf, unsigned int len) { */ int xmlBufferGrow(xmlBufferPtr buf, unsigned int len) { - int size; + unsigned int size; xmlChar *newbuf; if (buf == NULL) return(-1); if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return(0); - if (len + buf->use < buf->size) return(0); + if (len < buf->size - buf->use) + return(0); + if (len > UINT_MAX - buf->use) + return(-1); - /* - * Windows has a BIG problem on realloc timing, so we try to double - * the buffer size (if that's enough) (bug 146697) - * Apparently BSD too, and it's probably best for linux too - * On an embedded system this may be something to change - */ -#if 1 - if (buf->size > len) - size = buf->size * 2; - else - size = buf->use + len + 100; -#else - size = buf->use + len + 100; -#endif + if (buf->size > (size_t) len) { + size = buf->size > UINT_MAX / 2 ? UINT_MAX : buf->size * 2; + } else { + size = buf->use + len; + size = size > UINT_MAX - 100 ? UINT_MAX : size + 100; + } if ((buf->alloc == XML_BUFFER_ALLOC_IO) && (buf->contentIO != NULL)) { size_t start_buf = buf->content - buf->contentIO; @@ -7466,7 +7465,10 @@ xmlBufferResize(xmlBufferPtr buf, unsigned int size) case XML_BUFFER_ALLOC_IO: case XML_BUFFER_ALLOC_DOUBLEIT: /*take care of empty case*/ - newSize = (buf->size ? buf->size : size + 10); + if (buf->size == 0) + newSize = (size > UINT_MAX - 10 ? UINT_MAX : size + 10); + else + newSize = buf->size; while (size > newSize) { if (newSize > UINT_MAX / 2) { xmlTreeErrMemory("growing buffer"); @@ -7476,7 +7478,7 @@ xmlBufferResize(xmlBufferPtr buf, unsigned int size) } break; case XML_BUFFER_ALLOC_EXACT: - newSize = size+10; + newSize = (size > UINT_MAX - 10 ? UINT_MAX : size + 10);; break; case XML_BUFFER_ALLOC_HYBRID: if (buf->use < BASE_BUFFER_SIZE) @@ -7494,7 +7496,7 @@ xmlBufferResize(xmlBufferPtr buf, unsigned int size) break; default: - newSize = size+10; + newSize = (size > UINT_MAX - 10 ? UINT_MAX : size + 10);; break; } @@ -7580,8 +7582,10 @@ xmlBufferAdd(xmlBufferPtr buf, const xmlChar *str, int len) { if (len < 0) return -1; if (len == 0) return 0; - needSize = buf->use + len + 2; - if (needSize > buf->size){ + if ((unsigned) len >= buf->size - buf->use) { + if ((unsigned) len >= UINT_MAX - buf->use) + return XML_ERR_NO_MEMORY; + needSize = buf->use + len + 1; if (!xmlBufferResize(buf, needSize)){ xmlTreeErrMemory("growing buffer"); return XML_ERR_NO_MEMORY; @@ -7694,29 +7698,7 @@ xmlBufferCat(xmlBufferPtr buf, const xmlChar *str) { */ int xmlBufferCCat(xmlBufferPtr buf, const char *str) { - const char *cur; - - if (buf == NULL) - return(-1); - if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return -1; - if (str == NULL) { -#ifdef DEBUG_BUFFER - xmlGenericError(xmlGenericErrorContext, - "xmlBufferCCat: str == NULL\n"); -#endif - return -1; - } - for (cur = str;*cur != 0;cur++) { - if (buf->use + 10 >= buf->size) { - if (!xmlBufferResize(buf, buf->use+10)){ - xmlTreeErrMemory("growing buffer"); - return XML_ERR_NO_MEMORY; - } - } - buf->content[buf->use++] = *cur; - } - buf->content[buf->use] = 0; - return 0; + return xmlBufferCat(buf, (const xmlChar *) str); } /** diff --git a/valid.c b/valid.c index 8e596f1..ed3c850 100644 --- a/valid.c +++ b/valid.c @@ -479,35 +479,6 @@ nodeVPop(xmlValidCtxtPtr ctxt) return (ret); } -/** - * xmlValidNormalizeString: - * @str: a string - * - * Normalize a string in-place. - */ -static void -xmlValidNormalizeString(xmlChar *str) { - xmlChar *dst; - const xmlChar *src; - - if (str == NULL) - return; - src = str; - dst = str; - - while (*src == 0x20) src++; - while (*src != 0) { - if (*src == 0x20) { - while (*src == 0x20) src++; - if (*src != 0) - *dst++ = 0x20; - } else { - *dst++ = *src++; - } - } - *dst = 0; -} - #ifdef DEBUG_VALID_ALGO static void xmlValidPrintNode(xmlNodePtr cur) { @@ -1081,6 +1052,7 @@ xmlCopyDocElementContent(xmlDocPtr doc, xmlElementContentPtr cur) { tmp->type = cur->type; tmp->ocur = cur->ocur; prev->c2 = tmp; + tmp->parent = prev; if (cur->name != NULL) { if (dict) tmp->name = xmlDictLookup(dict, cur->name, -1); @@ -2636,6 +2608,35 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str)); +/** + * xmlValidNormalizeString: + * @str: a string + * + * Normalize a string in-place. + */ +static void +xmlValidNormalizeString(xmlChar *str) { + xmlChar *dst; + const xmlChar *src; + + if (str == NULL) + return; + src = str; + dst = str; + + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; +} + static int xmlIsStreaming(xmlValidCtxtPtr ctxt) { xmlParserCtxtPtr pctxt; diff --git a/xinclude.c b/xinclude.c index 2a0614d..e5fdf0f 100644 --- a/xinclude.c +++ b/xinclude.c @@ -525,8 +525,6 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { if (href == NULL) return(-1); } - if ((href[0] == '#') || (href[0] == 0)) - local = 1; parse = xmlXIncludeGetProp(ctxt, cur, XINCLUDE_PARSE); if (parse != NULL) { if (xmlStrEqual(parse, XINCLUDE_PARSE_XML)) @@ -623,6 +621,9 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { return(-1); } + if (xmlStrEqual(URL, ctxt->doc->URL)) + local = 1; + /* * If local and xml then we need a fragment */ diff --git a/xmlregexp.c b/xmlregexp.c index 8d01c2b..657912e 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -1693,12 +1693,12 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, counter = xmlRegGetCounter(ctxt); ctxt->counters[counter].min = atom->min - 1; ctxt->counters[counter].max = atom->max - 1; - /* count the number of times we see it again */ - xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, - atom->start, counter); /* allow a way out based on the count */ xmlFAGenerateCountedTransition(ctxt, atom->stop, newstate, counter); + /* count the number of times we see it again */ + xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, + atom->start, counter); /* and if needed allow a direct exit for 0 */ if (atom->min == 0) xmlFAGenerateEpsilonTransition(ctxt, atom->start0, @@ -3364,7 +3364,6 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { /* * this is a multiple input sequence * If there is a counter associated increment it now. - * before potentially saving and rollback * do not increment if the counter is already over the * maximum limit in which case get to next transition */ @@ -3380,15 +3379,17 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { counter = &exec->comp->counters[trans->counter]; if (exec->counts[trans->counter] >= counter->max) continue; /* for loop on transitions */ - + } + /* Save before incrementing */ + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { #ifdef DEBUG_REGEXP_EXEC printf("Increasing count %d\n", trans->counter); #endif exec->counts[trans->counter]++; } - if (exec->state->nbTrans > exec->transno + 1) { - xmlFARegExecSave(exec); - } exec->transcount = 1; do { /* @@ -5107,7 +5108,7 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { } NEXTL(len); cur = CUR; - if ((cur != '-') || (NXT(1) == ']')) { + if ((cur != '-') || (NXT(1) == '[') || (NXT(1) == ']')) { xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, XML_REGEXP_CHARVAL, start, end, NULL); return; diff --git a/xpath.c b/xpath.c index 2da591c..c2d8458 100644 --- a/xpath.c +++ b/xpath.c @@ -488,9 +488,9 @@ int wrap_cmp( xmlNodePtr x, xmlNodePtr y ); * * ************************************************************************/ -double xmlXPathNAN; -double xmlXPathPINF; -double xmlXPathNINF; +double xmlXPathNAN = 0.0; +double xmlXPathPINF = 0.0; +double xmlXPathNINF = 0.0; /** * xmlXPathInit: @@ -9260,52 +9260,45 @@ xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs) { */ void xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs) { - xmlXPathObjectPtr obj = NULL; - xmlChar *source = NULL; - xmlBufPtr target; - xmlChar blank; - - if (ctxt == NULL) return; - if (nargs == 0) { - /* Use current context node */ - valuePush(ctxt, - xmlXPathCacheWrapString(ctxt->context, - xmlXPathCastNodeToString(ctxt->context->node))); - nargs = 1; - } + xmlChar *source, *target; + int blank; - CHECK_ARITY(1); - CAST_TO_STRING; - CHECK_TYPE(XPATH_STRING); - obj = valuePop(ctxt); - source = obj->stringval; + if (ctxt == NULL) return; + if (nargs == 0) { + /* Use current context node */ + valuePush(ctxt, + xmlXPathCacheWrapString(ctxt->context, + xmlXPathCastNodeToString(ctxt->context->node))); + nargs = 1; + } - target = xmlBufCreate(); - if (target && source) { + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + source = ctxt->value->stringval; + if (source == NULL) + return; + target = source; /* Skip leading whitespaces */ while (IS_BLANK_CH(*source)) - source++; + source++; /* Collapse intermediate whitespaces, and skip trailing whitespaces */ blank = 0; while (*source) { - if (IS_BLANK_CH(*source)) { - blank = 0x20; - } else { - if (blank) { - xmlBufAdd(target, &blank, 1); - blank = 0; - } - xmlBufAdd(target, source, 1); - } - source++; + if (IS_BLANK_CH(*source)) { + blank = 1; + } else { + if (blank) { + *target++ = 0x20; + blank = 0; + } + *target++ = *source; + } + source++; } - valuePush(ctxt, xmlXPathCacheNewString(ctxt->context, - xmlBufContent(target))); - xmlBufFree(target); - } - xmlXPathReleaseObject(ctxt->context, obj); + *target = 0; } /** diff --git a/xpointer.c b/xpointer.c index afeaa2e..e9c783c 100644 --- a/xpointer.c +++ b/xpointer.c @@ -2756,6 +2756,7 @@ xmlXPtrStringRangeFunction(xmlXPathParserContextPtr ctxt, int nargs) { */ tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); xmlXPathFreeObject(set); + set = NULL; if (tmp == NULL) { xmlXPathErr(ctxt, XPATH_MEMORY_ERROR); goto error; -- 2.7.4