1 /* 4b74aa710b4ed5ce464b0ce544852cb47bf905c85a49c7bae2749f5885cb966d (2.2.5+)
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
33 #if !defined(_GNU_SOURCE)
34 # define _GNU_SOURCE 1 /* syscall prototype */
38 #include <string.h> /* memset(), memcpy() */
40 #include <limits.h> /* UINT_MAX */
41 #include <stdio.h> /* fprintf */
42 #include <stdlib.h> /* getenv */
45 #define getpid GetCurrentProcessId
47 #include <sys/time.h> /* gettimeofday() */
48 #include <sys/types.h> /* getpid() */
49 #include <unistd.h> /* getpid() */
50 #include <fcntl.h> /* O_RDONLY */
54 #define XML_BUILDING_EXPAT 1
57 #include "winconfig.h"
58 #elif defined(HAVE_EXPAT_CONFIG_H)
59 #include <expat_config.h>
60 #endif /* ndef _WIN32 */
66 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67 # if defined(HAVE_GETRANDOM)
68 # include <sys/random.h> /* getrandom */
70 # include <unistd.h> /* syscall */
71 # include <sys/syscall.h> /* SYS_getrandom */
73 # if ! defined(GRND_NONBLOCK)
74 # define GRND_NONBLOCK 0x0001
75 # endif /* defined(GRND_NONBLOCK) */
76 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
78 #if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80 # include <bsd/stdlib.h>
83 #if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
87 #if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
91 && !defined(XML_POOR_ENTROPY)
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
115 #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116 #define XmlConvert XmlUtf16Convert
117 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119 #define XmlEncode XmlUtf16Encode
120 /* Using pointer subtraction to convert to integer type. */
121 #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
122 typedef unsigned short ICHAR;
124 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125 #define XmlConvert XmlUtf8Convert
126 #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127 #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128 #define XmlEncode XmlUtf8Encode
129 #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
136 #define XmlInitEncodingNS XmlInitEncoding
137 #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138 #undef XmlGetInternalEncodingNS
139 #define XmlGetInternalEncodingNS XmlGetInternalEncoding
140 #define XmlParseXmlDeclNS XmlParseXmlDecl
146 #ifdef XML_UNICODE_WCHAR_T
147 #define XML_T(x) (const wchar_t)x
148 #define XML_L(x) L ## x
150 #define XML_T(x) (const unsigned short)x
161 /* Round up n to be a multiple of sz, where sz is a power of 2. */
162 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
164 /* Handle the case where memmove() doesn't exist. */
167 #define memmove(d,s,l) bcopy((s),(d),(l))
169 #error memmove does not exist on this platform, nor is a substitute available
170 #endif /* HAVE_BCOPY */
171 #endif /* HAVE_MEMMOVE */
173 #include "internal.h"
177 typedef const XML_Char *KEY;
188 const XML_Memory_Handling_Suite *mem;
195 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
197 /* For probing (after a collision) we need a step size relative prime
198 to the hash table size, which is a power of 2. We use double-hashing,
199 since we can calculate a second hash value cheaply by taking those bits
200 of the first hash value that were discarded (masked out) when the table
201 index was calculated: index = hash & mask, where mask = table->size - 1.
202 We limit the maximum step size to table->size / 4 (mask >> 2) and make
203 it odd, since odd numbers are always relative prime to a power of 2.
205 #define SECOND_HASH(hash, mask, power) \
206 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
207 #define PROBE_STEP(hash, mask, power) \
208 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
215 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
216 #define INIT_DATA_BUF_SIZE 1024
217 #define INIT_ATTS_SIZE 16
218 #define INIT_ATTS_VERSION 0xFFFFFFFF
219 #define INIT_BLOCK_SIZE 1024
220 #define INIT_BUFFER_SIZE 1024
222 #define EXPAND_SPARE 24
224 typedef struct binding {
225 struct prefix *prefix;
226 struct binding *nextTagBinding;
227 struct binding *prevPrefixBinding;
228 const struct attribute_id *attId;
234 typedef struct prefix {
235 const XML_Char *name;
241 const XML_Char *localPart;
242 const XML_Char *prefix;
248 /* TAG represents an open element.
249 The name of the element is stored in both the document and API
250 encodings. The memory buffer 'buf' is a separately-allocated
251 memory area which stores the name. During the XML_Parse()/
252 XMLParseBuffer() when the element is open, the memory for the 'raw'
253 version of the name (in the document encoding) is shared with the
254 document buffer. If the element is open across calls to
255 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
256 contain the 'raw' name as well.
258 A parser re-uses these structures, maintaining a list of allocated
259 TAG objects in a free list.
262 struct tag *parent; /* parent of this element */
263 const char *rawName; /* tagName in the original encoding */
265 TAG_NAME name; /* tagName in the API encoding */
266 char *buf; /* buffer for name components */
267 char *bufEnd; /* end of the buffer */
272 const XML_Char *name;
273 const XML_Char *textPtr;
274 int textLen; /* length in XML_Chars */
275 int processed; /* # of processed bytes - when suspended */
276 const XML_Char *systemId;
277 const XML_Char *base;
278 const XML_Char *publicId;
279 const XML_Char *notation;
282 XML_Bool is_internal; /* true if declared in internal subset outside PE */
286 enum XML_Content_Type type;
287 enum XML_Content_Quant quant;
288 const XML_Char * name;
295 #define INIT_SCAFFOLD_ELEMENTS 32
297 typedef struct block {
309 const XML_Memory_Handling_Suite *mem;
312 /* The XML_Char before the name is used to determine whether
313 an attribute has been specified. */
314 typedef struct attribute_id {
317 XML_Bool maybeTokenized;
322 const ATTRIBUTE_ID *id;
324 const XML_Char *value;
328 unsigned long version;
330 const XML_Char *uriName;
334 const XML_Char *name;
336 const ATTRIBUTE_ID *idAtt;
338 int allocDefaultAtts;
339 DEFAULT_ATTRIBUTE *defaultAtts;
343 HASH_TABLE generalEntities;
344 HASH_TABLE elementTypes;
345 HASH_TABLE attributeIds;
348 STRING_POOL entityValuePool;
349 /* false once a parameter entity reference has been skipped */
350 XML_Bool keepProcessing;
351 /* true once an internal or external PE reference has been encountered;
352 this includes the reference to an external subset */
353 XML_Bool hasParamEntityRefs;
356 /* indicates if external PE has been read */
357 XML_Bool paramEntityRead;
358 HASH_TABLE paramEntities;
360 PREFIX defaultPrefix;
361 /* === scaffolding for building content model === */
363 CONTENT_SCAFFOLD *scaffold;
364 unsigned contentStringLen;
371 typedef struct open_internal_entity {
372 const char *internalEventPtr;
373 const char *internalEventEndPtr;
374 struct open_internal_entity *next;
377 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
378 } OPEN_INTERNAL_ENTITY;
380 typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
383 const char **endPtr);
385 static Processor prologProcessor;
386 static Processor prologInitProcessor;
387 static Processor contentProcessor;
388 static Processor cdataSectionProcessor;
390 static Processor ignoreSectionProcessor;
391 static Processor externalParEntProcessor;
392 static Processor externalParEntInitProcessor;
393 static Processor entityValueProcessor;
394 static Processor entityValueInitProcessor;
396 static Processor epilogProcessor;
397 static Processor errorProcessor;
398 static Processor externalEntityInitProcessor;
399 static Processor externalEntityInitProcessor2;
400 static Processor externalEntityInitProcessor3;
401 static Processor externalEntityContentProcessor;
402 static Processor internalEntityProcessor;
404 static enum XML_Error
405 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
406 static enum XML_Error
407 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
408 const char *s, const char *next);
409 static enum XML_Error
410 initializeEncoding(XML_Parser parser);
411 static enum XML_Error
412 doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
413 const char *end, int tok, const char *next, const char **nextPtr,
415 static enum XML_Error
416 processInternalEntity(XML_Parser parser, ENTITY *entity,
417 XML_Bool betweenDecl);
418 static enum XML_Error
419 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
420 const char *start, const char *end, const char **endPtr,
422 static enum XML_Error
423 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
424 const char *end, const char **nextPtr, XML_Bool haveMore);
426 static enum XML_Error
427 doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
428 const char *end, const char **nextPtr, XML_Bool haveMore);
432 freeBindings(XML_Parser parser, BINDING *bindings);
433 static enum XML_Error
434 storeAtts(XML_Parser parser, const ENCODING *, const char *s,
435 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
436 static enum XML_Error
437 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
438 const XML_Char *uri, BINDING **bindingsPtr);
440 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
441 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
442 static enum XML_Error
443 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
444 const char *, const char *, STRING_POOL *);
445 static enum XML_Error
446 appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
448 static ATTRIBUTE_ID *
449 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
452 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
453 static enum XML_Error
454 storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
457 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
458 const char *start, const char *end);
460 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
463 reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
466 static const XML_Char * getContext(XML_Parser parser);
468 setContext(XML_Parser parser, const XML_Char *context);
470 static void FASTCALL normalizePublicId(XML_Char *s);
472 static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
473 /* do not call if m_parentParser != NULL */
474 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
476 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
478 dtdCopy(XML_Parser oldParser,
479 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
481 copyEntityTable(XML_Parser oldParser,
482 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
484 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
486 hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
487 static void FASTCALL hashTableClear(HASH_TABLE *);
488 static void FASTCALL hashTableDestroy(HASH_TABLE *);
490 hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
491 static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
494 poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
495 static void FASTCALL poolClear(STRING_POOL *);
496 static void FASTCALL poolDestroy(STRING_POOL *);
498 poolAppend(STRING_POOL *pool, const ENCODING *enc,
499 const char *ptr, const char *end);
501 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
504 static const XML_Char * FASTCALL
505 poolCopyString(STRING_POOL *pool, const XML_Char *s);
506 static const XML_Char *
507 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
508 static const XML_Char * FASTCALL
509 poolAppendString(STRING_POOL *pool, const XML_Char *s);
511 static int FASTCALL nextScaffoldPart(XML_Parser parser);
512 static XML_Content * build_model(XML_Parser parser);
513 static ELEMENT_TYPE *
514 getElementType(XML_Parser parser, const ENCODING *enc,
515 const char *ptr, const char *end);
517 static XML_Char *copyString(const XML_Char *s,
518 const XML_Memory_Handling_Suite *memsuite);
520 static unsigned long generate_hash_secret_salt(XML_Parser parser);
521 static XML_Bool startParsing(XML_Parser parser);
524 parserCreate(const XML_Char *encodingName,
525 const XML_Memory_Handling_Suite *memsuite,
526 const XML_Char *nameSep,
530 parserInit(XML_Parser parser, const XML_Char *encodingName);
532 #define poolStart(pool) ((pool)->start)
533 #define poolEnd(pool) ((pool)->ptr)
534 #define poolLength(pool) ((pool)->ptr - (pool)->start)
535 #define poolChop(pool) ((void)--(pool->ptr))
536 #define poolLastChar(pool) (((pool)->ptr)[-1])
537 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
538 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
539 #define poolAppendChar(pool, c) \
540 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
542 : ((*((pool)->ptr)++ = c), 1))
544 struct XML_ParserStruct {
545 /* The first member must be m_userData so that the XML_GetUserData
550 const XML_Memory_Handling_Suite m_mem;
551 /* first character to be parsed */
552 const char *m_bufferPtr;
553 /* past last character to be parsed */
555 /* allocated end of m_buffer */
556 const char *m_bufferLim;
557 XML_Index m_parseEndByteIndex;
558 const char *m_parseEndPtr;
560 XML_Char *m_dataBufEnd;
561 XML_StartElementHandler m_startElementHandler;
562 XML_EndElementHandler m_endElementHandler;
563 XML_CharacterDataHandler m_characterDataHandler;
564 XML_ProcessingInstructionHandler m_processingInstructionHandler;
565 XML_CommentHandler m_commentHandler;
566 XML_StartCdataSectionHandler m_startCdataSectionHandler;
567 XML_EndCdataSectionHandler m_endCdataSectionHandler;
568 XML_DefaultHandler m_defaultHandler;
569 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
570 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
571 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
572 XML_NotationDeclHandler m_notationDeclHandler;
573 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
574 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
575 XML_NotStandaloneHandler m_notStandaloneHandler;
576 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
577 XML_Parser m_externalEntityRefHandlerArg;
578 XML_SkippedEntityHandler m_skippedEntityHandler;
579 XML_UnknownEncodingHandler m_unknownEncodingHandler;
580 XML_ElementDeclHandler m_elementDeclHandler;
581 XML_AttlistDeclHandler m_attlistDeclHandler;
582 XML_EntityDeclHandler m_entityDeclHandler;
583 XML_XmlDeclHandler m_xmlDeclHandler;
584 const ENCODING *m_encoding;
585 INIT_ENCODING m_initEncoding;
586 const ENCODING *m_internalEncoding;
587 const XML_Char *m_protocolEncodingName;
589 XML_Bool m_ns_triplets;
590 void *m_unknownEncodingMem;
591 void *m_unknownEncodingData;
592 void *m_unknownEncodingHandlerData;
593 void (XMLCALL *m_unknownEncodingRelease)(void *);
594 PROLOG_STATE m_prologState;
595 Processor *m_processor;
596 enum XML_Error m_errorCode;
597 const char *m_eventPtr;
598 const char *m_eventEndPtr;
599 const char *m_positionPtr;
600 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
601 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
602 XML_Bool m_defaultExpandInternalEntities;
604 ENTITY *m_declEntity;
605 const XML_Char *m_doctypeName;
606 const XML_Char *m_doctypeSysid;
607 const XML_Char *m_doctypePubid;
608 const XML_Char *m_declAttributeType;
609 const XML_Char *m_declNotationName;
610 const XML_Char *m_declNotationPublicId;
611 ELEMENT_TYPE *m_declElementType;
612 ATTRIBUTE_ID *m_declAttributeId;
613 XML_Bool m_declAttributeIsCdata;
614 XML_Bool m_declAttributeIsId;
616 const XML_Char *m_curBase;
619 BINDING *m_inheritedBindings;
620 BINDING *m_freeBindingList;
622 int m_nSpecifiedAtts;
626 unsigned long m_nsAttsVersion;
627 unsigned char m_nsAttsPower;
629 XML_AttrInfo *m_attInfo;
632 STRING_POOL m_tempPool;
633 STRING_POOL m_temp2Pool;
634 char *m_groupConnector;
635 unsigned int m_groupSize;
636 XML_Char m_namespaceSeparator;
637 XML_Parser m_parentParser;
638 XML_ParsingStatus m_parsingStatus;
640 XML_Bool m_isParamEntity;
641 XML_Bool m_useForeignDTD;
642 enum XML_ParamEntityParsing m_paramEntityParsing;
644 unsigned long m_hash_secret_salt;
647 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
648 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
649 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
653 XML_ParserCreate(const XML_Char *encodingName)
655 return XML_ParserCreate_MM(encodingName, NULL, NULL);
659 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
663 return XML_ParserCreate_MM(encodingName, NULL, tmp);
666 static const XML_Char implicitContext[] = {
667 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
668 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
669 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
670 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
671 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
672 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
676 /* To avoid warnings about unused functions: */
677 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
679 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
681 /* Obtain entropy on Linux 3.17+ */
683 writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
684 int success = 0; /* full count bytes written? */
685 size_t bytesWrittenTotal = 0;
686 const unsigned int getrandomFlags = GRND_NONBLOCK;
689 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
690 const size_t bytesToWrite = count - bytesWrittenTotal;
692 const int bytesWrittenMore =
693 #if defined(HAVE_GETRANDOM)
694 getrandom(currentTarget, bytesToWrite, getrandomFlags);
696 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
699 if (bytesWrittenMore > 0) {
700 bytesWrittenTotal += bytesWrittenMore;
701 if (bytesWrittenTotal >= count)
704 } while (! success && (errno == EINTR));
709 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
712 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
714 /* Extract entropy from /dev/urandom */
716 writeRandomBytes_dev_urandom(void * target, size_t count) {
717 int success = 0; /* full count bytes written? */
718 size_t bytesWrittenTotal = 0;
720 const int fd = open("/dev/urandom", O_RDONLY);
726 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
727 const size_t bytesToWrite = count - bytesWrittenTotal;
729 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
731 if (bytesWrittenMore > 0) {
732 bytesWrittenTotal += bytesWrittenMore;
733 if (bytesWrittenTotal >= count)
736 } while (! success && (errno == EINTR));
742 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
744 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
747 #if defined(HAVE_ARC4RANDOM)
750 writeRandomBytes_arc4random(void * target, size_t count) {
751 size_t bytesWrittenTotal = 0;
753 while (bytesWrittenTotal < count) {
754 const uint32_t random32 = arc4random();
757 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
758 i++, bytesWrittenTotal++) {
759 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
760 ((uint8_t *)target)[bytesWrittenTotal] = random8;
765 #endif /* defined(HAVE_ARC4RANDOM) */
770 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
771 HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
773 /* Obtain entropy on Windows XP / Windows Server 2003 and later.
774 * Hint on RtlGenRandom and the following article from libsodium.
776 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
777 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
780 writeRandomBytes_RtlGenRandom(void * target, size_t count) {
781 int success = 0; /* full count bytes written? */
782 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
785 const RTLGENRANDOM_FUNC RtlGenRandom
786 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
788 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
792 FreeLibrary(advapi32);
801 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
804 gather_time_entropy(void)
808 GetSystemTimeAsFileTime(&ft); /* never fails */
809 return ft.dwHighDateTime ^ ft.dwLowDateTime;
812 int gettimeofday_res;
814 gettimeofday_res = gettimeofday(&tv, NULL);
817 (void)gettimeofday_res;
819 assert (gettimeofday_res == 0);
820 #endif /* defined(NDEBUG) */
822 /* Microseconds time is <20 bits entropy */
827 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
831 ENTROPY_DEBUG(const char * label, unsigned long entropy) {
832 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
833 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
834 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
836 (int)sizeof(entropy) * 2, entropy,
837 (unsigned long)sizeof(entropy));
843 generate_hash_secret_salt(XML_Parser parser)
845 unsigned long entropy;
848 /* "Failproof" high quality providers: */
849 #if defined(HAVE_ARC4RANDOM_BUF)
850 arc4random_buf(&entropy, sizeof(entropy));
851 return ENTROPY_DEBUG("arc4random_buf", entropy);
852 #elif defined(HAVE_ARC4RANDOM)
853 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
854 return ENTROPY_DEBUG("arc4random", entropy);
856 /* Try high quality providers first .. */
858 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
859 return ENTROPY_DEBUG("RtlGenRandom", entropy);
861 #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
862 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
863 return ENTROPY_DEBUG("getrandom", entropy);
866 #if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
867 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
868 return ENTROPY_DEBUG("/dev/urandom", entropy);
870 #endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
871 /* .. and self-made low quality for backup: */
873 /* Process ID is 0 bits entropy if attacker has local access */
874 entropy = gather_time_entropy() ^ getpid();
876 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
877 if (sizeof(unsigned long) == 4) {
878 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
880 return ENTROPY_DEBUG("fallback(8)",
881 entropy * (unsigned long)2305843009213693951ULL);
887 get_hash_secret_salt(XML_Parser parser) {
888 if (parser->m_parentParser != NULL)
889 return get_hash_secret_salt(parser->m_parentParser);
890 return parser->m_hash_secret_salt;
893 static XML_Bool /* only valid for root parser */
894 startParsing(XML_Parser parser)
896 /* hash functions must be initialized before setContext() is called */
897 if (parser->m_hash_secret_salt == 0)
898 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
900 /* implicit context only set for root parser, since child
901 parsers (i.e. external entity parsers) will inherit it
903 return setContext(parser, implicitContext);
909 XML_ParserCreate_MM(const XML_Char *encodingName,
910 const XML_Memory_Handling_Suite *memsuite,
911 const XML_Char *nameSep)
913 return parserCreate(encodingName, memsuite, nameSep, NULL);
917 parserCreate(const XML_Char *encodingName,
918 const XML_Memory_Handling_Suite *memsuite,
919 const XML_Char *nameSep,
925 XML_Memory_Handling_Suite *mtemp;
926 parser = (XML_Parser)
927 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
928 if (parser != NULL) {
929 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
930 mtemp->malloc_fcn = memsuite->malloc_fcn;
931 mtemp->realloc_fcn = memsuite->realloc_fcn;
932 mtemp->free_fcn = memsuite->free_fcn;
936 XML_Memory_Handling_Suite *mtemp;
937 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
938 if (parser != NULL) {
939 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
940 mtemp->malloc_fcn = malloc;
941 mtemp->realloc_fcn = realloc;
942 mtemp->free_fcn = free;
949 parser->m_buffer = NULL;
950 parser->m_bufferLim = NULL;
952 parser->m_attsSize = INIT_ATTS_SIZE;
953 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
954 if (parser->m_atts == NULL) {
955 FREE(parser, parser);
959 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
960 if (parser->m_attInfo == NULL) {
961 FREE(parser, parser->m_atts);
962 FREE(parser, parser);
966 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
967 if (parser->m_dataBuf == NULL) {
968 FREE(parser, parser->m_atts);
970 FREE(parser, parser->m_attInfo);
972 FREE(parser, parser);
975 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
980 parser->m_dtd = dtdCreate(&parser->m_mem);
981 if (parser->m_dtd == NULL) {
982 FREE(parser, parser->m_dataBuf);
983 FREE(parser, parser->m_atts);
985 FREE(parser, parser->m_attInfo);
987 FREE(parser, parser);
992 parser->m_freeBindingList = NULL;
993 parser->m_freeTagList = NULL;
994 parser->m_freeInternalEntities = NULL;
996 parser->m_groupSize = 0;
997 parser->m_groupConnector = NULL;
999 parser->m_unknownEncodingHandler = NULL;
1000 parser->m_unknownEncodingHandlerData = NULL;
1002 parser->m_namespaceSeparator = ASCII_EXCL;
1003 parser->m_ns = XML_FALSE;
1004 parser->m_ns_triplets = XML_FALSE;
1006 parser->m_nsAtts = NULL;
1007 parser->m_nsAttsVersion = 0;
1008 parser->m_nsAttsPower = 0;
1010 parser->m_protocolEncodingName = NULL;
1012 poolInit(&parser->m_tempPool, &(parser->m_mem));
1013 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1014 parserInit(parser, encodingName);
1016 if (encodingName && !parser->m_protocolEncodingName) {
1017 XML_ParserFree(parser);
1022 parser->m_ns = XML_TRUE;
1023 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1024 parser->m_namespaceSeparator = *nameSep;
1027 parser->m_internalEncoding = XmlGetInternalEncoding();
1034 parserInit(XML_Parser parser, const XML_Char *encodingName)
1036 parser->m_processor = prologInitProcessor;
1037 XmlPrologStateInit(&parser->m_prologState);
1038 if (encodingName != NULL) {
1039 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1041 parser->m_curBase = NULL;
1042 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1043 parser->m_userData = NULL;
1044 parser->m_handlerArg = NULL;
1045 parser->m_startElementHandler = NULL;
1046 parser->m_endElementHandler = NULL;
1047 parser->m_characterDataHandler = NULL;
1048 parser->m_processingInstructionHandler = NULL;
1049 parser->m_commentHandler = NULL;
1050 parser->m_startCdataSectionHandler = NULL;
1051 parser->m_endCdataSectionHandler = NULL;
1052 parser->m_defaultHandler = NULL;
1053 parser->m_startDoctypeDeclHandler = NULL;
1054 parser->m_endDoctypeDeclHandler = NULL;
1055 parser->m_unparsedEntityDeclHandler = NULL;
1056 parser->m_notationDeclHandler = NULL;
1057 parser->m_startNamespaceDeclHandler = NULL;
1058 parser->m_endNamespaceDeclHandler = NULL;
1059 parser->m_notStandaloneHandler = NULL;
1060 parser->m_externalEntityRefHandler = NULL;
1061 parser->m_externalEntityRefHandlerArg = parser;
1062 parser->m_skippedEntityHandler = NULL;
1063 parser->m_elementDeclHandler = NULL;
1064 parser->m_attlistDeclHandler = NULL;
1065 parser->m_entityDeclHandler = NULL;
1066 parser->m_xmlDeclHandler = NULL;
1067 parser->m_bufferPtr = parser->m_buffer;
1068 parser->m_bufferEnd = parser->m_buffer;
1069 parser->m_parseEndByteIndex = 0;
1070 parser->m_parseEndPtr = NULL;
1071 parser->m_declElementType = NULL;
1072 parser->m_declAttributeId = NULL;
1073 parser->m_declEntity = NULL;
1074 parser->m_doctypeName = NULL;
1075 parser->m_doctypeSysid = NULL;
1076 parser->m_doctypePubid = NULL;
1077 parser->m_declAttributeType = NULL;
1078 parser->m_declNotationName = NULL;
1079 parser->m_declNotationPublicId = NULL;
1080 parser->m_declAttributeIsCdata = XML_FALSE;
1081 parser->m_declAttributeIsId = XML_FALSE;
1082 memset(&parser->m_position, 0, sizeof(POSITION));
1083 parser->m_errorCode = XML_ERROR_NONE;
1084 parser->m_eventPtr = NULL;
1085 parser->m_eventEndPtr = NULL;
1086 parser->m_positionPtr = NULL;
1087 parser->m_openInternalEntities = NULL;
1088 parser->m_defaultExpandInternalEntities = XML_TRUE;
1089 parser->m_tagLevel = 0;
1090 parser->m_tagStack = NULL;
1091 parser->m_inheritedBindings = NULL;
1092 parser->m_nSpecifiedAtts = 0;
1093 parser->m_unknownEncodingMem = NULL;
1094 parser->m_unknownEncodingRelease = NULL;
1095 parser->m_unknownEncodingData = NULL;
1096 parser->m_parentParser = NULL;
1097 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1099 parser->m_isParamEntity = XML_FALSE;
1100 parser->m_useForeignDTD = XML_FALSE;
1101 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1103 parser->m_hash_secret_salt = 0;
1106 /* moves list of bindings to m_freeBindingList */
1107 static void FASTCALL
1108 moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1111 BINDING *b = bindings;
1112 bindings = bindings->nextTagBinding;
1113 b->nextTagBinding = parser->m_freeBindingList;
1114 parser->m_freeBindingList = b;
1119 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1122 OPEN_INTERNAL_ENTITY *openEntityList;
1127 if (parser->m_parentParser)
1129 /* move m_tagStack to m_freeTagList */
1130 tStk = parser->m_tagStack;
1133 tStk = tStk->parent;
1134 tag->parent = parser->m_freeTagList;
1135 moveToFreeBindingList(parser, tag->bindings);
1136 tag->bindings = NULL;
1137 parser->m_freeTagList = tag;
1139 /* move m_openInternalEntities to m_freeInternalEntities */
1140 openEntityList = parser->m_openInternalEntities;
1141 while (openEntityList) {
1142 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1143 openEntityList = openEntity->next;
1144 openEntity->next = parser->m_freeInternalEntities;
1145 parser->m_freeInternalEntities = openEntity;
1147 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1148 FREE(parser, parser->m_unknownEncodingMem);
1149 if (parser->m_unknownEncodingRelease)
1150 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1151 poolClear(&parser->m_tempPool);
1152 poolClear(&parser->m_temp2Pool);
1153 FREE(parser, (void *)parser->m_protocolEncodingName);
1154 parser->m_protocolEncodingName = NULL;
1155 parserInit(parser, encodingName);
1156 dtdReset(parser->m_dtd, &parser->m_mem);
1160 enum XML_Status XMLCALL
1161 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1164 return XML_STATUS_ERROR;
1165 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1166 XXX There's no way for the caller to determine which of the
1167 XXX possible error cases caused the XML_STATUS_ERROR return.
1169 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1170 return XML_STATUS_ERROR;
1172 /* Get rid of any previous encoding name */
1173 FREE(parser, (void *)parser->m_protocolEncodingName);
1175 if (encodingName == NULL)
1176 /* No new encoding name */
1177 parser->m_protocolEncodingName = NULL;
1179 /* Copy the new encoding name into allocated memory */
1180 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1181 if (!parser->m_protocolEncodingName)
1182 return XML_STATUS_ERROR;
1184 return XML_STATUS_OK;
1188 XML_ExternalEntityParserCreate(XML_Parser oldParser,
1189 const XML_Char *context,
1190 const XML_Char *encodingName)
1192 XML_Parser parser = oldParser;
1195 XML_StartElementHandler oldStartElementHandler;
1196 XML_EndElementHandler oldEndElementHandler;
1197 XML_CharacterDataHandler oldCharacterDataHandler;
1198 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1199 XML_CommentHandler oldCommentHandler;
1200 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1201 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1202 XML_DefaultHandler oldDefaultHandler;
1203 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1204 XML_NotationDeclHandler oldNotationDeclHandler;
1205 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1206 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1207 XML_NotStandaloneHandler oldNotStandaloneHandler;
1208 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1209 XML_SkippedEntityHandler oldSkippedEntityHandler;
1210 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1211 XML_ElementDeclHandler oldElementDeclHandler;
1212 XML_AttlistDeclHandler oldAttlistDeclHandler;
1213 XML_EntityDeclHandler oldEntityDeclHandler;
1214 XML_XmlDeclHandler oldXmlDeclHandler;
1215 ELEMENT_TYPE * oldDeclElementType;
1218 void *oldHandlerArg;
1219 XML_Bool oldDefaultExpandInternalEntities;
1220 XML_Parser oldExternalEntityRefHandlerArg;
1222 enum XML_ParamEntityParsing oldParamEntityParsing;
1223 int oldInEntityValue;
1225 XML_Bool oldns_triplets;
1226 /* Note that the new parser shares the same hash secret as the old
1227 parser, so that dtdCopy and copyEntityTable can lookup values
1228 from hash tables associated with either parser without us having
1229 to worry which hash secrets each table has.
1231 unsigned long oldhash_secret_salt;
1233 /* Validate the oldParser parameter before we pull everything out of it */
1234 if (oldParser == NULL)
1237 /* Stash the original parser contents on the stack */
1238 oldDtd = parser->m_dtd;
1239 oldStartElementHandler = parser->m_startElementHandler;
1240 oldEndElementHandler = parser->m_endElementHandler;
1241 oldCharacterDataHandler = parser->m_characterDataHandler;
1242 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1243 oldCommentHandler = parser->m_commentHandler;
1244 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1245 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1246 oldDefaultHandler = parser->m_defaultHandler;
1247 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1248 oldNotationDeclHandler = parser->m_notationDeclHandler;
1249 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1250 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1251 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1252 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1253 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1254 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1255 oldElementDeclHandler = parser->m_elementDeclHandler;
1256 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1257 oldEntityDeclHandler = parser->m_entityDeclHandler;
1258 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1259 oldDeclElementType = parser->m_declElementType;
1261 oldUserData = parser->m_userData;
1262 oldHandlerArg = parser->m_handlerArg;
1263 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1264 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1266 oldParamEntityParsing = parser->m_paramEntityParsing;
1267 oldInEntityValue = parser->m_prologState.inEntityValue;
1269 oldns_triplets = parser->m_ns_triplets;
1270 /* Note that the new parser shares the same hash secret as the old
1271 parser, so that dtdCopy and copyEntityTable can lookup values
1272 from hash tables associated with either parser without us having
1273 to worry which hash secrets each table has.
1275 oldhash_secret_salt = parser->m_hash_secret_salt;
1280 #endif /* XML_DTD */
1282 /* Note that the magical uses of the pre-processor to make field
1283 access look more like C++ require that `parser' be overwritten
1284 here. This makes this function more painful to follow than it
1289 *tmp = parser->m_namespaceSeparator;
1290 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1293 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1299 parser->m_startElementHandler = oldStartElementHandler;
1300 parser->m_endElementHandler = oldEndElementHandler;
1301 parser->m_characterDataHandler = oldCharacterDataHandler;
1302 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1303 parser->m_commentHandler = oldCommentHandler;
1304 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1305 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1306 parser->m_defaultHandler = oldDefaultHandler;
1307 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1308 parser->m_notationDeclHandler = oldNotationDeclHandler;
1309 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1310 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1311 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1312 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1313 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1314 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1315 parser->m_elementDeclHandler = oldElementDeclHandler;
1316 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1317 parser->m_entityDeclHandler = oldEntityDeclHandler;
1318 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1319 parser->m_declElementType = oldDeclElementType;
1320 parser->m_userData = oldUserData;
1321 if (oldUserData == oldHandlerArg)
1322 parser->m_handlerArg = parser->m_userData;
1324 parser->m_handlerArg = parser;
1325 if (oldExternalEntityRefHandlerArg != oldParser)
1326 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1327 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1328 parser->m_ns_triplets = oldns_triplets;
1329 parser->m_hash_secret_salt = oldhash_secret_salt;
1330 parser->m_parentParser = oldParser;
1332 parser->m_paramEntityParsing = oldParamEntityParsing;
1333 parser->m_prologState.inEntityValue = oldInEntityValue;
1335 #endif /* XML_DTD */
1336 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1337 || !setContext(parser, context)) {
1338 XML_ParserFree(parser);
1341 parser->m_processor = externalEntityInitProcessor;
1345 /* The DTD instance referenced by parser->m_dtd is shared between the document's
1346 root parser and external PE parsers, therefore one does not need to
1347 call setContext. In addition, one also *must* not call setContext,
1348 because this would overwrite existing prefix->binding pointers in
1349 parser->m_dtd with ones that get destroyed with the external PE parser.
1350 This would leave those prefixes with dangling pointers.
1352 parser->m_isParamEntity = XML_TRUE;
1353 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1354 parser->m_processor = externalParEntInitProcessor;
1356 #endif /* XML_DTD */
1360 static void FASTCALL
1361 destroyBindings(BINDING *bindings, XML_Parser parser)
1364 BINDING *b = bindings;
1367 bindings = b->nextTagBinding;
1368 FREE(parser, b->uri);
1374 XML_ParserFree(XML_Parser parser)
1377 OPEN_INTERNAL_ENTITY *entityList;
1380 /* free m_tagStack and m_freeTagList */
1381 tagList = parser->m_tagStack;
1384 if (tagList == NULL) {
1385 if (parser->m_freeTagList == NULL)
1387 tagList = parser->m_freeTagList;
1388 parser->m_freeTagList = NULL;
1391 tagList = tagList->parent;
1392 FREE(parser, p->buf);
1393 destroyBindings(p->bindings, parser);
1396 /* free m_openInternalEntities and m_freeInternalEntities */
1397 entityList = parser->m_openInternalEntities;
1399 OPEN_INTERNAL_ENTITY *openEntity;
1400 if (entityList == NULL) {
1401 if (parser->m_freeInternalEntities == NULL)
1403 entityList = parser->m_freeInternalEntities;
1404 parser->m_freeInternalEntities = NULL;
1406 openEntity = entityList;
1407 entityList = entityList->next;
1408 FREE(parser, openEntity);
1411 destroyBindings(parser->m_freeBindingList, parser);
1412 destroyBindings(parser->m_inheritedBindings, parser);
1413 poolDestroy(&parser->m_tempPool);
1414 poolDestroy(&parser->m_temp2Pool);
1415 FREE(parser, (void *)parser->m_protocolEncodingName);
1417 /* external parameter entity parsers share the DTD structure
1418 parser->m_dtd with the root parser, so we must not destroy it
1420 if (!parser->m_isParamEntity && parser->m_dtd)
1423 #endif /* XML_DTD */
1424 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1425 FREE(parser, (void *)parser->m_atts);
1426 #ifdef XML_ATTR_INFO
1427 FREE(parser, (void *)parser->m_attInfo);
1429 FREE(parser, parser->m_groupConnector);
1430 FREE(parser, parser->m_buffer);
1431 FREE(parser, parser->m_dataBuf);
1432 FREE(parser, parser->m_nsAtts);
1433 FREE(parser, parser->m_unknownEncodingMem);
1434 if (parser->m_unknownEncodingRelease)
1435 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1436 FREE(parser, parser);
1440 XML_UseParserAsHandlerArg(XML_Parser parser)
1443 parser->m_handlerArg = parser;
1446 enum XML_Error XMLCALL
1447 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1450 return XML_ERROR_INVALID_ARGUMENT;
1452 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1453 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1454 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1455 parser->m_useForeignDTD = useDTD;
1456 return XML_ERROR_NONE;
1458 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1463 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1467 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1468 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1470 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1474 XML_SetUserData(XML_Parser parser, void *p)
1478 if (parser->m_handlerArg == parser->m_userData)
1479 parser->m_handlerArg = parser->m_userData = p;
1481 parser->m_userData = p;
1484 enum XML_Status XMLCALL
1485 XML_SetBase(XML_Parser parser, const XML_Char *p)
1488 return XML_STATUS_ERROR;
1490 p = poolCopyString(&parser->m_dtd->pool, p);
1492 return XML_STATUS_ERROR;
1493 parser->m_curBase = p;
1496 parser->m_curBase = NULL;
1497 return XML_STATUS_OK;
1500 const XML_Char * XMLCALL
1501 XML_GetBase(XML_Parser parser)
1505 return parser->m_curBase;
1509 XML_GetSpecifiedAttributeCount(XML_Parser parser)
1513 return parser->m_nSpecifiedAtts;
1517 XML_GetIdAttributeIndex(XML_Parser parser)
1521 return parser->m_idAttIndex;
1524 #ifdef XML_ATTR_INFO
1525 const XML_AttrInfo * XMLCALL
1526 XML_GetAttributeInfo(XML_Parser parser)
1530 return parser->m_attInfo;
1535 XML_SetElementHandler(XML_Parser parser,
1536 XML_StartElementHandler start,
1537 XML_EndElementHandler end)
1541 parser->m_startElementHandler = start;
1542 parser->m_endElementHandler = end;
1546 XML_SetStartElementHandler(XML_Parser parser,
1547 XML_StartElementHandler start) {
1549 parser->m_startElementHandler = start;
1553 XML_SetEndElementHandler(XML_Parser parser,
1554 XML_EndElementHandler end) {
1556 parser->m_endElementHandler = end;
1560 XML_SetCharacterDataHandler(XML_Parser parser,
1561 XML_CharacterDataHandler handler)
1564 parser->m_characterDataHandler = handler;
1568 XML_SetProcessingInstructionHandler(XML_Parser parser,
1569 XML_ProcessingInstructionHandler handler)
1572 parser->m_processingInstructionHandler = handler;
1576 XML_SetCommentHandler(XML_Parser parser,
1577 XML_CommentHandler handler)
1580 parser->m_commentHandler = handler;
1584 XML_SetCdataSectionHandler(XML_Parser parser,
1585 XML_StartCdataSectionHandler start,
1586 XML_EndCdataSectionHandler end)
1590 parser->m_startCdataSectionHandler = start;
1591 parser->m_endCdataSectionHandler = end;
1595 XML_SetStartCdataSectionHandler(XML_Parser parser,
1596 XML_StartCdataSectionHandler start) {
1598 parser->m_startCdataSectionHandler = start;
1602 XML_SetEndCdataSectionHandler(XML_Parser parser,
1603 XML_EndCdataSectionHandler end) {
1605 parser->m_endCdataSectionHandler = end;
1609 XML_SetDefaultHandler(XML_Parser parser,
1610 XML_DefaultHandler handler)
1614 parser->m_defaultHandler = handler;
1615 parser->m_defaultExpandInternalEntities = XML_FALSE;
1619 XML_SetDefaultHandlerExpand(XML_Parser parser,
1620 XML_DefaultHandler handler)
1624 parser->m_defaultHandler = handler;
1625 parser->m_defaultExpandInternalEntities = XML_TRUE;
1629 XML_SetDoctypeDeclHandler(XML_Parser parser,
1630 XML_StartDoctypeDeclHandler start,
1631 XML_EndDoctypeDeclHandler end)
1635 parser->m_startDoctypeDeclHandler = start;
1636 parser->m_endDoctypeDeclHandler = end;
1640 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1641 XML_StartDoctypeDeclHandler start) {
1643 parser->m_startDoctypeDeclHandler = start;
1647 XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1648 XML_EndDoctypeDeclHandler end) {
1650 parser->m_endDoctypeDeclHandler = end;
1654 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1655 XML_UnparsedEntityDeclHandler handler)
1658 parser->m_unparsedEntityDeclHandler = handler;
1662 XML_SetNotationDeclHandler(XML_Parser parser,
1663 XML_NotationDeclHandler handler)
1666 parser->m_notationDeclHandler = handler;
1670 XML_SetNamespaceDeclHandler(XML_Parser parser,
1671 XML_StartNamespaceDeclHandler start,
1672 XML_EndNamespaceDeclHandler end)
1676 parser->m_startNamespaceDeclHandler = start;
1677 parser->m_endNamespaceDeclHandler = end;
1681 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1682 XML_StartNamespaceDeclHandler start) {
1684 parser->m_startNamespaceDeclHandler = start;
1688 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1689 XML_EndNamespaceDeclHandler end) {
1691 parser->m_endNamespaceDeclHandler = end;
1695 XML_SetNotStandaloneHandler(XML_Parser parser,
1696 XML_NotStandaloneHandler handler)
1699 parser->m_notStandaloneHandler = handler;
1703 XML_SetExternalEntityRefHandler(XML_Parser parser,
1704 XML_ExternalEntityRefHandler handler)
1707 parser->m_externalEntityRefHandler = handler;
1711 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
1716 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1718 parser->m_externalEntityRefHandlerArg = parser;
1722 XML_SetSkippedEntityHandler(XML_Parser parser,
1723 XML_SkippedEntityHandler handler)
1726 parser->m_skippedEntityHandler = handler;
1730 XML_SetUnknownEncodingHandler(XML_Parser parser,
1731 XML_UnknownEncodingHandler handler,
1736 parser->m_unknownEncodingHandler = handler;
1737 parser->m_unknownEncodingHandlerData = data;
1741 XML_SetElementDeclHandler(XML_Parser parser,
1742 XML_ElementDeclHandler eldecl)
1745 parser->m_elementDeclHandler = eldecl;
1749 XML_SetAttlistDeclHandler(XML_Parser parser,
1750 XML_AttlistDeclHandler attdecl)
1753 parser->m_attlistDeclHandler = attdecl;
1757 XML_SetEntityDeclHandler(XML_Parser parser,
1758 XML_EntityDeclHandler handler)
1761 parser->m_entityDeclHandler = handler;
1765 XML_SetXmlDeclHandler(XML_Parser parser,
1766 XML_XmlDeclHandler handler) {
1768 parser->m_xmlDeclHandler = handler;
1772 XML_SetParamEntityParsing(XML_Parser parser,
1773 enum XML_ParamEntityParsing peParsing)
1777 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1778 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1781 parser->m_paramEntityParsing = peParsing;
1784 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1789 XML_SetHashSalt(XML_Parser parser,
1790 unsigned long hash_salt)
1794 if (parser->m_parentParser)
1795 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1796 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1797 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1799 parser->m_hash_secret_salt = hash_salt;
1803 enum XML_Status XMLCALL
1804 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
1806 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1808 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1809 return XML_STATUS_ERROR;
1811 switch (parser->m_parsingStatus.parsing) {
1813 parser->m_errorCode = XML_ERROR_SUSPENDED;
1814 return XML_STATUS_ERROR;
1816 parser->m_errorCode = XML_ERROR_FINISHED;
1817 return XML_STATUS_ERROR;
1818 case XML_INITIALIZED:
1819 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1820 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1821 return XML_STATUS_ERROR;
1824 parser->m_parsingStatus.parsing = XML_PARSING;
1828 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1830 return XML_STATUS_OK;
1831 parser->m_positionPtr = parser->m_bufferPtr;
1832 parser->m_parseEndPtr = parser->m_bufferEnd;
1834 /* If data are left over from last buffer, and we now know that these
1835 data are the final chunk of input, then we have to check them again
1836 to detect errors based on that fact.
1838 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
1840 if (parser->m_errorCode == XML_ERROR_NONE) {
1841 switch (parser->m_parsingStatus.parsing) {
1843 /* It is hard to be certain, but it seems that this case
1844 * cannot occur. This code is cleaning up a previous parse
1845 * with no new data (since len == 0). Changing the parsing
1846 * state requires getting to execute a handler function, and
1847 * there doesn't seem to be an opportunity for that while in
1848 * this circumstance.
1850 * Given the uncertainty, we retain the code but exclude it
1851 * from coverage tests.
1855 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1856 parser->m_positionPtr = parser->m_bufferPtr;
1857 return XML_STATUS_SUSPENDED;
1858 /* LCOV_EXCL_STOP */
1859 case XML_INITIALIZED:
1861 parser->m_parsingStatus.parsing = XML_FINISHED;
1864 return XML_STATUS_OK;
1867 parser->m_eventEndPtr = parser->m_eventPtr;
1868 parser->m_processor = errorProcessor;
1869 return XML_STATUS_ERROR;
1871 #ifndef XML_CONTEXT_BYTES
1872 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1875 enum XML_Status result;
1876 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1877 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1878 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1879 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1880 parser->m_processor = errorProcessor;
1881 return XML_STATUS_ERROR;
1883 parser->m_parseEndByteIndex += len;
1884 parser->m_positionPtr = s;
1885 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1887 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1889 if (parser->m_errorCode != XML_ERROR_NONE) {
1890 parser->m_eventEndPtr = parser->m_eventPtr;
1891 parser->m_processor = errorProcessor;
1892 return XML_STATUS_ERROR;
1895 switch (parser->m_parsingStatus.parsing) {
1897 result = XML_STATUS_SUSPENDED;
1899 case XML_INITIALIZED:
1902 parser->m_parsingStatus.parsing = XML_FINISHED;
1903 return XML_STATUS_OK;
1907 result = XML_STATUS_OK;
1911 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
1912 nLeftOver = s + len - end;
1914 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1915 /* avoid _signed_ integer overflow */
1917 const int bytesToAllocate = (int)((unsigned)len * 2U);
1918 if (bytesToAllocate > 0) {
1919 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1922 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1923 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1924 parser->m_processor = errorProcessor;
1925 return XML_STATUS_ERROR;
1927 parser->m_buffer = temp;
1928 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1930 memcpy(parser->m_buffer, end, nLeftOver);
1932 parser->m_bufferPtr = parser->m_buffer;
1933 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1934 parser->m_positionPtr = parser->m_bufferPtr;
1935 parser->m_parseEndPtr = parser->m_bufferEnd;
1936 parser->m_eventPtr = parser->m_bufferPtr;
1937 parser->m_eventEndPtr = parser->m_bufferPtr;
1940 #endif /* not defined XML_CONTEXT_BYTES */
1942 void *buff = XML_GetBuffer(parser, len);
1944 return XML_STATUS_ERROR;
1946 memcpy(buff, s, len);
1947 return XML_ParseBuffer(parser, len, isFinal);
1952 enum XML_Status XMLCALL
1953 XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
1956 enum XML_Status result = XML_STATUS_OK;
1959 return XML_STATUS_ERROR;
1960 switch (parser->m_parsingStatus.parsing) {
1962 parser->m_errorCode = XML_ERROR_SUSPENDED;
1963 return XML_STATUS_ERROR;
1965 parser->m_errorCode = XML_ERROR_FINISHED;
1966 return XML_STATUS_ERROR;
1967 case XML_INITIALIZED:
1968 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1969 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1970 return XML_STATUS_ERROR;
1973 parser->m_parsingStatus.parsing = XML_PARSING;
1976 start = parser->m_bufferPtr;
1977 parser->m_positionPtr = start;
1978 parser->m_bufferEnd += len;
1979 parser->m_parseEndPtr = parser->m_bufferEnd;
1980 parser->m_parseEndByteIndex += len;
1981 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1983 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
1985 if (parser->m_errorCode != XML_ERROR_NONE) {
1986 parser->m_eventEndPtr = parser->m_eventPtr;
1987 parser->m_processor = errorProcessor;
1988 return XML_STATUS_ERROR;
1991 switch (parser->m_parsingStatus.parsing) {
1993 result = XML_STATUS_SUSPENDED;
1995 case XML_INITIALIZED:
1998 parser->m_parsingStatus.parsing = XML_FINISHED;
2001 default: ; /* should not happen */
2005 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2006 parser->m_positionPtr = parser->m_bufferPtr;
2011 XML_GetBuffer(XML_Parser parser, int len)
2016 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2019 switch (parser->m_parsingStatus.parsing) {
2021 parser->m_errorCode = XML_ERROR_SUSPENDED;
2024 parser->m_errorCode = XML_ERROR_FINISHED;
2029 if (len > parser->m_bufferLim - parser->m_bufferEnd) {
2030 #ifdef XML_CONTEXT_BYTES
2032 #endif /* defined XML_CONTEXT_BYTES */
2033 /* Do not invoke signed arithmetic overflow: */
2034 int neededSize = (int) ((unsigned)len + (unsigned)(parser->m_bufferEnd - parser->m_bufferPtr));
2035 if (neededSize < 0) {
2036 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2039 #ifdef XML_CONTEXT_BYTES
2040 keep = (int)(parser->m_bufferPtr - parser->m_buffer);
2041 if (keep > XML_CONTEXT_BYTES)
2042 keep = XML_CONTEXT_BYTES;
2044 #endif /* defined XML_CONTEXT_BYTES */
2045 if (neededSize <= parser->m_bufferLim - parser->m_buffer) {
2046 #ifdef XML_CONTEXT_BYTES
2047 if (keep < parser->m_bufferPtr - parser->m_buffer) {
2048 int offset = (int)(parser->m_bufferPtr - parser->m_buffer) - keep;
2049 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2050 parser->m_bufferEnd -= offset;
2051 parser->m_bufferPtr -= offset;
2054 memmove(parser->m_buffer, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2055 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr);
2056 parser->m_bufferPtr = parser->m_buffer;
2057 #endif /* not defined XML_CONTEXT_BYTES */
2061 int bufferSize = (int)(parser->m_bufferLim - parser->m_bufferPtr);
2062 if (bufferSize == 0)
2063 bufferSize = INIT_BUFFER_SIZE;
2065 /* Do not invoke signed arithmetic overflow: */
2066 bufferSize = (int) (2U * (unsigned) bufferSize);
2067 } while (bufferSize < neededSize && bufferSize > 0);
2068 if (bufferSize <= 0) {
2069 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2072 newBuf = (char *)MALLOC(parser, bufferSize);
2074 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2077 parser->m_bufferLim = newBuf + bufferSize;
2078 #ifdef XML_CONTEXT_BYTES
2079 if (parser->m_bufferPtr) {
2080 int keep = (int)(parser->m_bufferPtr - parser->m_buffer);
2081 if (keep > XML_CONTEXT_BYTES)
2082 keep = XML_CONTEXT_BYTES;
2083 memcpy(newBuf, &parser->m_bufferPtr[-keep], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2084 FREE(parser, parser->m_buffer);
2085 parser->m_buffer = newBuf;
2086 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr) + keep;
2087 parser->m_bufferPtr = parser->m_buffer + keep;
2090 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2091 parser->m_bufferPtr = parser->m_buffer = newBuf;
2094 if (parser->m_bufferPtr) {
2095 memcpy(newBuf, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2096 FREE(parser, parser->m_buffer);
2098 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2099 parser->m_bufferPtr = parser->m_buffer = newBuf;
2100 #endif /* not defined XML_CONTEXT_BYTES */
2102 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2103 parser->m_positionPtr = NULL;
2105 return parser->m_bufferEnd;
2108 enum XML_Status XMLCALL
2109 XML_StopParser(XML_Parser parser, XML_Bool resumable)
2112 return XML_STATUS_ERROR;
2113 switch (parser->m_parsingStatus.parsing) {
2116 parser->m_errorCode = XML_ERROR_SUSPENDED;
2117 return XML_STATUS_ERROR;
2119 parser->m_parsingStatus.parsing = XML_FINISHED;
2122 parser->m_errorCode = XML_ERROR_FINISHED;
2123 return XML_STATUS_ERROR;
2127 if (parser->m_isParamEntity) {
2128 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2129 return XML_STATUS_ERROR;
2132 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2135 parser->m_parsingStatus.parsing = XML_FINISHED;
2137 return XML_STATUS_OK;
2140 enum XML_Status XMLCALL
2141 XML_ResumeParser(XML_Parser parser)
2143 enum XML_Status result = XML_STATUS_OK;
2146 return XML_STATUS_ERROR;
2147 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2148 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2149 return XML_STATUS_ERROR;
2151 parser->m_parsingStatus.parsing = XML_PARSING;
2153 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2155 if (parser->m_errorCode != XML_ERROR_NONE) {
2156 parser->m_eventEndPtr = parser->m_eventPtr;
2157 parser->m_processor = errorProcessor;
2158 return XML_STATUS_ERROR;
2161 switch (parser->m_parsingStatus.parsing) {
2163 result = XML_STATUS_SUSPENDED;
2165 case XML_INITIALIZED:
2167 if (parser->m_parsingStatus.finalBuffer) {
2168 parser->m_parsingStatus.parsing = XML_FINISHED;
2175 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2176 parser->m_positionPtr = parser->m_bufferPtr;
2181 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2185 assert(status != NULL);
2186 *status = parser->m_parsingStatus;
2189 enum XML_Error XMLCALL
2190 XML_GetErrorCode(XML_Parser parser)
2193 return XML_ERROR_INVALID_ARGUMENT;
2194 return parser->m_errorCode;
2198 XML_GetCurrentByteIndex(XML_Parser parser)
2202 if (parser->m_eventPtr)
2203 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
2208 XML_GetCurrentByteCount(XML_Parser parser)
2212 if (parser->m_eventEndPtr && parser->m_eventPtr)
2213 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2217 const char * XMLCALL
2218 XML_GetInputContext(XML_Parser parser, int *offset, int *size)
2220 #ifdef XML_CONTEXT_BYTES
2223 if (parser->m_eventPtr && parser->m_buffer) {
2225 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2227 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2228 return parser->m_buffer;
2234 #endif /* defined XML_CONTEXT_BYTES */
2239 XML_GetCurrentLineNumber(XML_Parser parser)
2243 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2244 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2245 parser->m_positionPtr = parser->m_eventPtr;
2247 return parser->m_position.lineNumber + 1;
2251 XML_GetCurrentColumnNumber(XML_Parser parser)
2255 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2256 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2257 parser->m_positionPtr = parser->m_eventPtr;
2259 return parser->m_position.columnNumber;
2263 XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2266 FREE(parser, model);
2270 XML_MemMalloc(XML_Parser parser, size_t size)
2274 return MALLOC(parser, size);
2278 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2282 return REALLOC(parser, ptr, size);
2286 XML_MemFree(XML_Parser parser, void *ptr)
2293 XML_DefaultCurrent(XML_Parser parser)
2297 if (parser->m_defaultHandler) {
2298 if (parser->m_openInternalEntities)
2299 reportDefault(parser,
2300 parser->m_internalEncoding,
2301 parser->m_openInternalEntities->internalEventPtr,
2302 parser->m_openInternalEntities->internalEventEndPtr);
2304 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
2308 const XML_LChar * XMLCALL
2309 XML_ErrorString(enum XML_Error code)
2312 case XML_ERROR_NONE:
2314 case XML_ERROR_NO_MEMORY:
2315 return XML_L("out of memory");
2316 case XML_ERROR_SYNTAX:
2317 return XML_L("syntax error");
2318 case XML_ERROR_NO_ELEMENTS:
2319 return XML_L("no element found");
2320 case XML_ERROR_INVALID_TOKEN:
2321 return XML_L("not well-formed (invalid token)");
2322 case XML_ERROR_UNCLOSED_TOKEN:
2323 return XML_L("unclosed token");
2324 case XML_ERROR_PARTIAL_CHAR:
2325 return XML_L("partial character");
2326 case XML_ERROR_TAG_MISMATCH:
2327 return XML_L("mismatched tag");
2328 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2329 return XML_L("duplicate attribute");
2330 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2331 return XML_L("junk after document element");
2332 case XML_ERROR_PARAM_ENTITY_REF:
2333 return XML_L("illegal parameter entity reference");
2334 case XML_ERROR_UNDEFINED_ENTITY:
2335 return XML_L("undefined entity");
2336 case XML_ERROR_RECURSIVE_ENTITY_REF:
2337 return XML_L("recursive entity reference");
2338 case XML_ERROR_ASYNC_ENTITY:
2339 return XML_L("asynchronous entity");
2340 case XML_ERROR_BAD_CHAR_REF:
2341 return XML_L("reference to invalid character number");
2342 case XML_ERROR_BINARY_ENTITY_REF:
2343 return XML_L("reference to binary entity");
2344 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2345 return XML_L("reference to external entity in attribute");
2346 case XML_ERROR_MISPLACED_XML_PI:
2347 return XML_L("XML or text declaration not at start of entity");
2348 case XML_ERROR_UNKNOWN_ENCODING:
2349 return XML_L("unknown encoding");
2350 case XML_ERROR_INCORRECT_ENCODING:
2351 return XML_L("encoding specified in XML declaration is incorrect");
2352 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2353 return XML_L("unclosed CDATA section");
2354 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2355 return XML_L("error in processing external entity reference");
2356 case XML_ERROR_NOT_STANDALONE:
2357 return XML_L("document is not standalone");
2358 case XML_ERROR_UNEXPECTED_STATE:
2359 return XML_L("unexpected parser state - please send a bug report");
2360 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2361 return XML_L("entity declared in parameter entity");
2362 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2363 return XML_L("requested feature requires XML_DTD support in Expat");
2364 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2365 return XML_L("cannot change setting once parsing has begun");
2366 /* Added in 1.95.7. */
2367 case XML_ERROR_UNBOUND_PREFIX:
2368 return XML_L("unbound prefix");
2369 /* Added in 1.95.8. */
2370 case XML_ERROR_UNDECLARING_PREFIX:
2371 return XML_L("must not undeclare prefix");
2372 case XML_ERROR_INCOMPLETE_PE:
2373 return XML_L("incomplete markup in parameter entity");
2374 case XML_ERROR_XML_DECL:
2375 return XML_L("XML declaration not well-formed");
2376 case XML_ERROR_TEXT_DECL:
2377 return XML_L("text declaration not well-formed");
2378 case XML_ERROR_PUBLICID:
2379 return XML_L("illegal character(s) in public id");
2380 case XML_ERROR_SUSPENDED:
2381 return XML_L("parser suspended");
2382 case XML_ERROR_NOT_SUSPENDED:
2383 return XML_L("parser not suspended");
2384 case XML_ERROR_ABORTED:
2385 return XML_L("parsing aborted");
2386 case XML_ERROR_FINISHED:
2387 return XML_L("parsing finished");
2388 case XML_ERROR_SUSPEND_PE:
2389 return XML_L("cannot suspend in external parameter entity");
2390 /* Added in 2.0.0. */
2391 case XML_ERROR_RESERVED_PREFIX_XML:
2392 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2393 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2394 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2395 case XML_ERROR_RESERVED_NAMESPACE_URI:
2396 return XML_L("prefix must not be bound to one of the reserved namespace names");
2397 /* Added in 2.2.5. */
2398 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2399 return XML_L("invalid argument");
2404 const XML_LChar * XMLCALL
2405 XML_ExpatVersion(void) {
2407 /* V1 is used to string-ize the version number. However, it would
2408 string-ize the actual version macro *names* unless we get them
2409 substituted before being passed to V1. CPP is defined to expand
2410 a macro, then rescan for more expansions. Thus, we use V2 to expand
2411 the version macros, then CPP will expand the resulting V1() macro
2412 with the correct numerals. */
2413 /* ### I'm assuming cpp is portable in this respect... */
2415 #define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2416 #define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2418 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2424 XML_Expat_Version XMLCALL
2425 XML_ExpatVersionInfo(void)
2427 XML_Expat_Version version;
2429 version.major = XML_MAJOR_VERSION;
2430 version.minor = XML_MINOR_VERSION;
2431 version.micro = XML_MICRO_VERSION;
2436 const XML_Feature * XMLCALL
2437 XML_GetFeatureList(void)
2439 static const XML_Feature features[] = {
2440 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2442 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2445 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2447 #ifdef XML_UNICODE_WCHAR_T
2448 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2451 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2453 #ifdef XML_CONTEXT_BYTES
2454 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2458 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2461 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2463 #ifdef XML_LARGE_SIZE
2464 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2466 #ifdef XML_ATTR_INFO
2467 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2469 {XML_FEATURE_END, NULL, 0}
2475 /* Initially tag->rawName always points into the parse buffer;
2476 for those TAG instances opened while the current parse buffer was
2477 processed, and not yet closed, we need to store tag->rawName in a more
2478 permanent location, since the parse buffer is about to be discarded.
2481 storeRawNames(XML_Parser parser)
2483 TAG *tag = parser->m_tagStack;
2486 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2487 char *rawNameBuf = tag->buf + nameLen;
2488 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2489 at the first entry that has already been copied; everything
2490 below it in the stack is already been accounted for in a
2491 previous call to this function.
2493 if (tag->rawName == rawNameBuf)
2495 /* For re-use purposes we need to ensure that the
2496 size of tag->buf is a multiple of sizeof(XML_Char).
2498 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2499 if (bufSize > tag->bufEnd - tag->buf) {
2500 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2503 /* if tag->name.str points to tag->buf (only when namespace
2504 processing is off) then we have to update it
2506 if (tag->name.str == (XML_Char *)tag->buf)
2507 tag->name.str = (XML_Char *)temp;
2508 /* if tag->name.localPart is set (when namespace processing is on)
2509 then update it as well, since it will always point into tag->buf
2511 if (tag->name.localPart)
2512 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2513 (XML_Char *)tag->buf);
2515 tag->bufEnd = temp + bufSize;
2516 rawNameBuf = temp + nameLen;
2518 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2519 tag->rawName = rawNameBuf;
2525 static enum XML_Error PTRCALL
2526 contentProcessor(XML_Parser parser,
2529 const char **endPtr)
2531 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2532 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2533 if (result == XML_ERROR_NONE) {
2534 if (!storeRawNames(parser))
2535 return XML_ERROR_NO_MEMORY;
2540 static enum XML_Error PTRCALL
2541 externalEntityInitProcessor(XML_Parser parser,
2544 const char **endPtr)
2546 enum XML_Error result = initializeEncoding(parser);
2547 if (result != XML_ERROR_NONE)
2549 parser->m_processor = externalEntityInitProcessor2;
2550 return externalEntityInitProcessor2(parser, start, end, endPtr);
2553 static enum XML_Error PTRCALL
2554 externalEntityInitProcessor2(XML_Parser parser,
2557 const char **endPtr)
2559 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2560 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2563 /* If we are at the end of the buffer, this would cause the next stage,
2564 i.e. externalEntityInitProcessor3, to pass control directly to
2565 doContent (by detecting XML_TOK_NONE) without processing any xml text
2566 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2568 if (next == end && !parser->m_parsingStatus.finalBuffer) {
2570 return XML_ERROR_NONE;
2574 case XML_TOK_PARTIAL:
2575 if (!parser->m_parsingStatus.finalBuffer) {
2577 return XML_ERROR_NONE;
2579 parser->m_eventPtr = start;
2580 return XML_ERROR_UNCLOSED_TOKEN;
2581 case XML_TOK_PARTIAL_CHAR:
2582 if (!parser->m_parsingStatus.finalBuffer) {
2584 return XML_ERROR_NONE;
2586 parser->m_eventPtr = start;
2587 return XML_ERROR_PARTIAL_CHAR;
2589 parser->m_processor = externalEntityInitProcessor3;
2590 return externalEntityInitProcessor3(parser, start, end, endPtr);
2593 static enum XML_Error PTRCALL
2594 externalEntityInitProcessor3(XML_Parser parser,
2597 const char **endPtr)
2600 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2601 parser->m_eventPtr = start;
2602 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2603 parser->m_eventEndPtr = next;
2606 case XML_TOK_XML_DECL:
2608 enum XML_Error result;
2609 result = processXmlDecl(parser, 1, start, next);
2610 if (result != XML_ERROR_NONE)
2612 switch (parser->m_parsingStatus.parsing) {
2615 return XML_ERROR_NONE;
2617 return XML_ERROR_ABORTED;
2623 case XML_TOK_PARTIAL:
2624 if (!parser->m_parsingStatus.finalBuffer) {
2626 return XML_ERROR_NONE;
2628 return XML_ERROR_UNCLOSED_TOKEN;
2629 case XML_TOK_PARTIAL_CHAR:
2630 if (!parser->m_parsingStatus.finalBuffer) {
2632 return XML_ERROR_NONE;
2634 return XML_ERROR_PARTIAL_CHAR;
2636 parser->m_processor = externalEntityContentProcessor;
2637 parser->m_tagLevel = 1;
2638 return externalEntityContentProcessor(parser, start, end, endPtr);
2641 static enum XML_Error PTRCALL
2642 externalEntityContentProcessor(XML_Parser parser,
2645 const char **endPtr)
2647 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2648 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
2649 if (result == XML_ERROR_NONE) {
2650 if (!storeRawNames(parser))
2651 return XML_ERROR_NO_MEMORY;
2656 static enum XML_Error
2657 doContent(XML_Parser parser,
2659 const ENCODING *enc,
2662 const char **nextPtr,
2665 /* save one level of indirection */
2666 DTD * const dtd = parser->m_dtd;
2668 const char **eventPP;
2669 const char **eventEndPP;
2670 if (enc == parser->m_encoding) {
2671 eventPP = &parser->m_eventPtr;
2672 eventEndPP = &parser->m_eventEndPtr;
2675 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2676 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2681 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2682 int tok = XmlContentTok(enc, s, end, &next);
2685 case XML_TOK_TRAILING_CR:
2688 return XML_ERROR_NONE;
2691 if (parser->m_characterDataHandler) {
2693 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2695 else if (parser->m_defaultHandler)
2696 reportDefault(parser, enc, s, end);
2697 /* We are at the end of the final buffer, should we check for
2698 XML_SUSPENDED, XML_FINISHED?
2700 if (startTagLevel == 0)
2701 return XML_ERROR_NO_ELEMENTS;
2702 if (parser->m_tagLevel != startTagLevel)
2703 return XML_ERROR_ASYNC_ENTITY;
2705 return XML_ERROR_NONE;
2709 return XML_ERROR_NONE;
2711 if (startTagLevel > 0) {
2712 if (parser->m_tagLevel != startTagLevel)
2713 return XML_ERROR_ASYNC_ENTITY;
2715 return XML_ERROR_NONE;
2717 return XML_ERROR_NO_ELEMENTS;
2718 case XML_TOK_INVALID:
2720 return XML_ERROR_INVALID_TOKEN;
2721 case XML_TOK_PARTIAL:
2724 return XML_ERROR_NONE;
2726 return XML_ERROR_UNCLOSED_TOKEN;
2727 case XML_TOK_PARTIAL_CHAR:
2730 return XML_ERROR_NONE;
2732 return XML_ERROR_PARTIAL_CHAR;
2733 case XML_TOK_ENTITY_REF:
2735 const XML_Char *name;
2737 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2738 s + enc->minBytesPerChar,
2739 next - enc->minBytesPerChar);
2741 if (parser->m_characterDataHandler)
2742 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2743 else if (parser->m_defaultHandler)
2744 reportDefault(parser, enc, s, next);
2747 name = poolStoreString(&dtd->pool, enc,
2748 s + enc->minBytesPerChar,
2749 next - enc->minBytesPerChar);
2751 return XML_ERROR_NO_MEMORY;
2752 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2753 poolDiscard(&dtd->pool);
2754 /* First, determine if a check for an existing declaration is needed;
2755 if yes, check that the entity exists, and that it is internal,
2756 otherwise call the skipped entity or default handler.
2758 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2760 return XML_ERROR_UNDEFINED_ENTITY;
2761 else if (!entity->is_internal)
2762 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2765 if (parser->m_skippedEntityHandler)
2766 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2767 else if (parser->m_defaultHandler)
2768 reportDefault(parser, enc, s, next);
2772 return XML_ERROR_RECURSIVE_ENTITY_REF;
2773 if (entity->notation)
2774 return XML_ERROR_BINARY_ENTITY_REF;
2775 if (entity->textPtr) {
2776 enum XML_Error result;
2777 if (!parser->m_defaultExpandInternalEntities) {
2778 if (parser->m_skippedEntityHandler)
2779 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2780 else if (parser->m_defaultHandler)
2781 reportDefault(parser, enc, s, next);
2784 result = processInternalEntity(parser, entity, XML_FALSE);
2785 if (result != XML_ERROR_NONE)
2788 else if (parser->m_externalEntityRefHandler) {
2789 const XML_Char *context;
2790 entity->open = XML_TRUE;
2791 context = getContext(parser);
2792 entity->open = XML_FALSE;
2794 return XML_ERROR_NO_MEMORY;
2795 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
2800 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2801 poolDiscard(&parser->m_tempPool);
2803 else if (parser->m_defaultHandler)
2804 reportDefault(parser, enc, s, next);
2807 case XML_TOK_START_TAG_NO_ATTS:
2809 case XML_TOK_START_TAG_WITH_ATTS:
2812 enum XML_Error result;
2814 if (parser->m_freeTagList) {
2815 tag = parser->m_freeTagList;
2816 parser->m_freeTagList = parser->m_freeTagList->parent;
2819 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2821 return XML_ERROR_NO_MEMORY;
2822 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2825 return XML_ERROR_NO_MEMORY;
2827 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2829 tag->bindings = NULL;
2830 tag->parent = parser->m_tagStack;
2831 parser->m_tagStack = tag;
2832 tag->name.localPart = NULL;
2833 tag->name.prefix = NULL;
2834 tag->rawName = s + enc->minBytesPerChar;
2835 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2836 ++parser->m_tagLevel;
2838 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2839 const char *fromPtr = tag->rawName;
2840 toPtr = (XML_Char *)tag->buf;
2844 const enum XML_Convert_Result convert_res = XmlConvert(enc,
2845 &fromPtr, rawNameEnd,
2846 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
2847 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2848 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2849 tag->name.strLen = convLen;
2852 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2854 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2856 return XML_ERROR_NO_MEMORY;
2858 tag->bufEnd = temp + bufSize;
2859 toPtr = (XML_Char *)temp + convLen;
2863 tag->name.str = (XML_Char *)tag->buf;
2864 *toPtr = XML_T('\0');
2865 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2868 if (parser->m_startElementHandler)
2869 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2870 (const XML_Char **)parser->m_atts);
2871 else if (parser->m_defaultHandler)
2872 reportDefault(parser, enc, s, next);
2873 poolClear(&parser->m_tempPool);
2876 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2878 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2880 const char *rawName = s + enc->minBytesPerChar;
2881 enum XML_Error result;
2882 BINDING *bindings = NULL;
2883 XML_Bool noElmHandlers = XML_TRUE;
2885 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2886 rawName + XmlNameLength(enc, rawName));
2888 return XML_ERROR_NO_MEMORY;
2889 poolFinish(&parser->m_tempPool);
2890 result = storeAtts(parser, enc, s, &name, &bindings);
2891 if (result != XML_ERROR_NONE) {
2892 freeBindings(parser, bindings);
2895 poolFinish(&parser->m_tempPool);
2896 if (parser->m_startElementHandler) {
2897 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
2898 noElmHandlers = XML_FALSE;
2900 if (parser->m_endElementHandler) {
2901 if (parser->m_startElementHandler)
2902 *eventPP = *eventEndPP;
2903 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2904 noElmHandlers = XML_FALSE;
2906 if (noElmHandlers && parser->m_defaultHandler)
2907 reportDefault(parser, enc, s, next);
2908 poolClear(&parser->m_tempPool);
2909 freeBindings(parser, bindings);
2911 if ((parser->m_tagLevel == 0) &&
2912 !((parser->m_parsingStatus.parsing == XML_FINISHED) || (parser->m_parsingStatus.parsing == XML_SUSPENDED))) {
2913 return epilogProcessor(parser, next, end, nextPtr);
2916 case XML_TOK_END_TAG:
2917 if (parser->m_tagLevel == startTagLevel)
2918 return XML_ERROR_ASYNC_ENTITY;
2921 const char *rawName;
2922 TAG *tag = parser->m_tagStack;
2923 parser->m_tagStack = tag->parent;
2924 tag->parent = parser->m_freeTagList;
2925 parser->m_freeTagList = tag;
2926 rawName = s + enc->minBytesPerChar*2;
2927 len = XmlNameLength(enc, rawName);
2928 if (len != tag->rawNameLength
2929 || memcmp(tag->rawName, rawName, len) != 0) {
2931 return XML_ERROR_TAG_MISMATCH;
2933 --parser->m_tagLevel;
2934 if (parser->m_endElementHandler) {
2935 const XML_Char *localPart;
2936 const XML_Char *prefix;
2938 localPart = tag->name.localPart;
2939 if (parser->m_ns && localPart) {
2940 /* localPart and prefix may have been overwritten in
2941 tag->name.str, since this points to the binding->uri
2942 buffer which gets re-used; so we have to add them again
2944 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2945 /* don't need to check for space - already done in storeAtts() */
2946 while (*localPart) *uri++ = *localPart++;
2947 prefix = (XML_Char *)tag->name.prefix;
2948 if (parser->m_ns_triplets && prefix) {
2949 *uri++ = parser->m_namespaceSeparator;
2950 while (*prefix) *uri++ = *prefix++;
2954 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
2956 else if (parser->m_defaultHandler)
2957 reportDefault(parser, enc, s, next);
2958 while (tag->bindings) {
2959 BINDING *b = tag->bindings;
2960 if (parser->m_endNamespaceDeclHandler)
2961 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
2962 tag->bindings = tag->bindings->nextTagBinding;
2963 b->nextTagBinding = parser->m_freeBindingList;
2964 parser->m_freeBindingList = b;
2965 b->prefix->binding = b->prevPrefixBinding;
2967 if (parser->m_tagLevel == 0)
2968 return epilogProcessor(parser, next, end, nextPtr);
2971 case XML_TOK_CHAR_REF:
2973 int n = XmlCharRefNumber(enc, s);
2975 return XML_ERROR_BAD_CHAR_REF;
2976 if (parser->m_characterDataHandler) {
2977 XML_Char buf[XML_ENCODE_MAX];
2978 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2980 else if (parser->m_defaultHandler)
2981 reportDefault(parser, enc, s, next);
2984 case XML_TOK_XML_DECL:
2985 return XML_ERROR_MISPLACED_XML_PI;
2986 case XML_TOK_DATA_NEWLINE:
2987 if (parser->m_characterDataHandler) {
2989 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2991 else if (parser->m_defaultHandler)
2992 reportDefault(parser, enc, s, next);
2994 case XML_TOK_CDATA_SECT_OPEN:
2996 enum XML_Error result;
2997 if (parser->m_startCdataSectionHandler)
2998 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3000 /* Suppose you doing a transformation on a document that involves
3001 changing only the character data. You set up a defaultHandler
3002 and a characterDataHandler. The defaultHandler simply copies
3003 characters through. The characterDataHandler does the
3004 transformation and writes the characters out escaping them as
3005 necessary. This case will fail to work if we leave out the
3006 following two lines (because & and < inside CDATA sections will
3007 be incorrectly escaped).
3009 However, now we have a start/endCdataSectionHandler, so it seems
3010 easier to let the user deal with this.
3012 else if (parser->m_characterDataHandler)
3013 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3015 else if (parser->m_defaultHandler)
3016 reportDefault(parser, enc, s, next);
3017 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3018 if (result != XML_ERROR_NONE)
3021 parser->m_processor = cdataSectionProcessor;
3026 case XML_TOK_TRAILING_RSQB:
3029 return XML_ERROR_NONE;
3031 if (parser->m_characterDataHandler) {
3032 if (MUST_CONVERT(enc, s)) {
3033 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3034 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3035 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3036 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3039 parser->m_characterDataHandler(parser->m_handlerArg,
3041 (int)((XML_Char *)end - (XML_Char *)s));
3043 else if (parser->m_defaultHandler)
3044 reportDefault(parser, enc, s, end);
3045 /* We are at the end of the final buffer, should we check for
3046 XML_SUSPENDED, XML_FINISHED?
3048 if (startTagLevel == 0) {
3050 return XML_ERROR_NO_ELEMENTS;
3052 if (parser->m_tagLevel != startTagLevel) {
3054 return XML_ERROR_ASYNC_ENTITY;
3057 return XML_ERROR_NONE;
3058 case XML_TOK_DATA_CHARS:
3060 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3061 if (charDataHandler) {
3062 if (MUST_CONVERT(enc, s)) {
3064 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3065 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3067 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3068 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3069 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3075 charDataHandler(parser->m_handlerArg,
3077 (int)((XML_Char *)next - (XML_Char *)s));
3079 else if (parser->m_defaultHandler)
3080 reportDefault(parser, enc, s, next);
3084 if (!reportProcessingInstruction(parser, enc, s, next))
3085 return XML_ERROR_NO_MEMORY;
3087 case XML_TOK_COMMENT:
3088 if (!reportComment(parser, enc, s, next))
3089 return XML_ERROR_NO_MEMORY;
3092 /* All of the tokens produced by XmlContentTok() have their own
3093 * explicit cases, so this default is not strictly necessary.
3094 * However it is a useful safety net, so we retain the code and
3095 * simply exclude it from the coverage tests.
3099 if (parser->m_defaultHandler)
3100 reportDefault(parser, enc, s, next);
3102 /* LCOV_EXCL_STOP */
3104 *eventPP = s = next;
3105 switch (parser->m_parsingStatus.parsing) {
3108 return XML_ERROR_NONE;
3110 return XML_ERROR_ABORTED;
3117 /* This function does not call free() on the allocated memory, merely
3118 * moving it to the parser's m_freeBindingList where it can be freed or
3119 * reused as appropriate.
3122 freeBindings(XML_Parser parser, BINDING *bindings)
3125 BINDING *b = bindings;
3127 /* m_startNamespaceDeclHandler will have been called for this
3128 * binding in addBindings(), so call the end handler now.
3130 if (parser->m_endNamespaceDeclHandler)
3131 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3133 bindings = bindings->nextTagBinding;
3134 b->nextTagBinding = parser->m_freeBindingList;
3135 parser->m_freeBindingList = b;
3136 b->prefix->binding = b->prevPrefixBinding;
3140 /* Precondition: all arguments must be non-NULL;
3142 - normalize attributes
3143 - check attributes for well-formedness
3144 - generate namespace aware attribute names (URI, prefix)
3145 - build list of attributes for startElementHandler
3146 - default attributes
3147 - process namespace declarations (check and report them)
3148 - generate namespace aware element name (URI, prefix)
3150 static enum XML_Error
3151 storeAtts(XML_Parser parser, const ENCODING *enc,
3152 const char *attStr, TAG_NAME *tagNamePtr,
3153 BINDING **bindingsPtr)
3155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
3156 ELEMENT_TYPE *elementType;
3158 const XML_Char **appAtts; /* the attribute list for the application */
3166 const XML_Char *localPart;
3168 /* lookup the element type name */
3169 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
3171 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3173 return XML_ERROR_NO_MEMORY;
3174 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3175 sizeof(ELEMENT_TYPE));
3177 return XML_ERROR_NO_MEMORY;
3178 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
3179 return XML_ERROR_NO_MEMORY;
3181 nDefaultAtts = elementType->nDefaultAtts;
3183 /* get the attributes from the tokenizer */
3184 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3185 if (n + nDefaultAtts > parser->m_attsSize) {
3186 int oldAttsSize = parser->m_attsSize;
3188 #ifdef XML_ATTR_INFO
3189 XML_AttrInfo *temp2;
3191 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3192 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
3194 parser->m_attsSize = oldAttsSize;
3195 return XML_ERROR_NO_MEMORY;
3197 parser->m_atts = temp;
3198 #ifdef XML_ATTR_INFO
3199 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
3200 if (temp2 == NULL) {
3201 parser->m_attsSize = oldAttsSize;
3202 return XML_ERROR_NO_MEMORY;
3204 parser->m_attInfo = temp2;
3206 if (n > oldAttsSize)
3207 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3210 appAtts = (const XML_Char **)parser->m_atts;
3211 for (i = 0; i < n; i++) {
3212 ATTRIBUTE *currAtt = &parser->m_atts[i];
3213 #ifdef XML_ATTR_INFO
3214 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3216 /* add the name and value to the attribute list */
3217 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3219 + XmlNameLength(enc, currAtt->name));
3221 return XML_ERROR_NO_MEMORY;
3222 #ifdef XML_ATTR_INFO
3223 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3224 currAttInfo->nameEnd = currAttInfo->nameStart +
3225 XmlNameLength(enc, currAtt->name);
3226 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3227 (parser->m_parseEndPtr - currAtt->valuePtr);
3228 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
3230 /* Detect duplicate attributes by their QNames. This does not work when
3231 namespace processing is turned on and different prefixes for the same
3232 namespace are used. For this case we have a check further down.
3234 if ((attId->name)[-1]) {
3235 if (enc == parser->m_encoding)
3236 parser->m_eventPtr = parser->m_atts[i].name;
3237 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3239 (attId->name)[-1] = 1;
3240 appAtts[attIndex++] = attId->name;
3241 if (!parser->m_atts[i].normalized) {
3242 enum XML_Error result;
3243 XML_Bool isCdata = XML_TRUE;
3245 /* figure out whether declared as other than CDATA */
3246 if (attId->maybeTokenized) {
3248 for (j = 0; j < nDefaultAtts; j++) {
3249 if (attId == elementType->defaultAtts[j].id) {
3250 isCdata = elementType->defaultAtts[j].isCdata;
3256 /* normalize the attribute value */
3257 result = storeAttributeValue(parser, enc, isCdata,
3258 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3259 &parser->m_tempPool);
3262 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3263 poolFinish(&parser->m_tempPool);
3266 /* the value did not need normalizing */
3267 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3268 parser->m_atts[i].valueEnd);
3269 if (appAtts[attIndex] == 0)
3270 return XML_ERROR_NO_MEMORY;
3271 poolFinish(&parser->m_tempPool);
3273 /* handle prefixed attribute names */
3274 if (attId->prefix) {
3276 /* deal with namespace declarations here */
3277 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3278 appAtts[attIndex], bindingsPtr);
3284 /* deal with other prefixed names later */
3287 (attId->name)[-1] = 2;
3294 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3295 parser->m_nSpecifiedAtts = attIndex;
3296 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3297 for (i = 0; i < attIndex; i += 2)
3298 if (appAtts[i] == elementType->idAtt->name) {
3299 parser->m_idAttIndex = i;
3304 parser->m_idAttIndex = -1;
3306 /* do attribute defaulting */
3307 for (i = 0; i < nDefaultAtts; i++) {
3308 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3309 if (!(da->id->name)[-1] && da->value) {
3310 if (da->id->prefix) {
3311 if (da->id->xmlns) {
3312 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3313 da->value, bindingsPtr);
3318 (da->id->name)[-1] = 2;
3320 appAtts[attIndex++] = da->id->name;
3321 appAtts[attIndex++] = da->value;
3325 (da->id->name)[-1] = 1;
3326 appAtts[attIndex++] = da->id->name;
3327 appAtts[attIndex++] = da->value;
3331 appAtts[attIndex] = 0;
3333 /* expand prefixed attribute names, check for duplicates,
3334 and clear flags that say whether attributes were specified */
3337 int j; /* hash table index */
3338 unsigned long version = parser->m_nsAttsVersion;
3339 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3340 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3341 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3342 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3344 /* hash table size must also be a power of 2 and >= 8 */
3345 while (nPrefixes >> parser->m_nsAttsPower++);
3346 if (parser->m_nsAttsPower < 3)
3347 parser->m_nsAttsPower = 3;
3348 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3349 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
3351 /* Restore actual size of memory in m_nsAtts */
3352 parser->m_nsAttsPower = oldNsAttsPower;
3353 return XML_ERROR_NO_MEMORY;
3355 parser->m_nsAtts = temp;
3356 version = 0; /* force re-initialization of m_nsAtts hash table */
3358 /* using a version flag saves us from initializing m_nsAtts every time */
3359 if (!version) { /* initialize version flags when version wraps around */
3360 version = INIT_ATTS_VERSION;
3361 for (j = nsAttsSize; j != 0; )
3362 parser->m_nsAtts[--j].version = version;
3364 parser->m_nsAttsVersion = --version;
3366 /* expand prefixed names and check for duplicates */
3367 for (; i < attIndex; i += 2) {
3368 const XML_Char *s = appAtts[i];
3369 if (s[-1] == 2) { /* prefixed */
3372 unsigned long uriHash;
3373 struct siphash sip_state;
3374 struct sipkey sip_key;
3376 copy_salt_to_sipkey(parser, &sip_key);
3377 sip24_init(&sip_state, &sip_key);
3379 ((XML_Char *)s)[-1] = 0; /* clear flag */
3380 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3381 if (!id || !id->prefix) {
3382 /* This code is walking through the appAtts array, dealing
3383 * with (in this case) a prefixed attribute name. To be in
3384 * the array, the attribute must have already been bound, so
3385 * has to have passed through the hash table lookup once
3386 * already. That implies that an entry for it already
3387 * exists, so the lookup above will return a pointer to
3388 * already allocated memory. There is no opportunaity for
3389 * the allocator to fail, so the condition above cannot be
3392 * Since it is difficult to be certain that the above
3393 * analysis is complete, we retain the test and merely
3394 * remove the code from coverage tests.
3396 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3398 b = id->prefix->binding;
3400 return XML_ERROR_UNBOUND_PREFIX;
3402 for (j = 0; j < b->uriLen; j++) {
3403 const XML_Char c = b->uri[j];
3404 if (!poolAppendChar(&parser->m_tempPool, c))
3405 return XML_ERROR_NO_MEMORY;
3408 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3410 while (*s++ != XML_T(ASCII_COLON))
3413 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3415 do { /* copies null terminator */
3416 if (!poolAppendChar(&parser->m_tempPool, *s))
3417 return XML_ERROR_NO_MEMORY;
3420 uriHash = (unsigned long)sip24_final(&sip_state);
3422 { /* Check hash table for duplicate of expanded name (uriName).
3423 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3425 unsigned char step = 0;
3426 unsigned long mask = nsAttsSize - 1;
3427 j = uriHash & mask; /* index into hash table */
3428 while (parser->m_nsAtts[j].version == version) {
3429 /* for speed we compare stored hash values first */
3430 if (uriHash == parser->m_nsAtts[j].hash) {
3431 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3432 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3433 /* s1 is null terminated, but not s2 */
3434 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3436 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3439 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3440 j < step ? (j += nsAttsSize - step) : (j -= step);
3444 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3445 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3446 s = b->prefix->name;
3448 if (!poolAppendChar(&parser->m_tempPool, *s))
3449 return XML_ERROR_NO_MEMORY;
3453 /* store expanded name in attribute list */
3454 s = poolStart(&parser->m_tempPool);
3455 poolFinish(&parser->m_tempPool);
3458 /* fill empty slot with new version, uriName and hash value */
3459 parser->m_nsAtts[j].version = version;
3460 parser->m_nsAtts[j].hash = uriHash;
3461 parser->m_nsAtts[j].uriName = s;
3468 else /* not prefixed */
3469 ((XML_Char *)s)[-1] = 0; /* clear flag */
3472 /* clear flags for the remaining attributes */
3473 for (; i < attIndex; i += 2)
3474 ((XML_Char *)(appAtts[i]))[-1] = 0;
3475 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3476 binding->attId->name[-1] = 0;
3479 return XML_ERROR_NONE;
3481 /* expand the element type name */
3482 if (elementType->prefix) {
3483 binding = elementType->prefix->binding;
3485 return XML_ERROR_UNBOUND_PREFIX;
3486 localPart = tagNamePtr->str;
3487 while (*localPart++ != XML_T(ASCII_COLON))
3490 else if (dtd->defaultPrefix.binding) {
3491 binding = dtd->defaultPrefix.binding;
3492 localPart = tagNamePtr->str;
3495 return XML_ERROR_NONE;
3497 if (parser->m_ns_triplets && binding->prefix->name) {
3498 for (; binding->prefix->name[prefixLen++];)
3499 ; /* prefixLen includes null terminator */
3501 tagNamePtr->localPart = localPart;
3502 tagNamePtr->uriLen = binding->uriLen;
3503 tagNamePtr->prefix = binding->prefix->name;
3504 tagNamePtr->prefixLen = prefixLen;
3505 for (i = 0; localPart[i++];)
3506 ; /* i includes null terminator */
3507 n = i + binding->uriLen + prefixLen;
3508 if (n > binding->uriAlloc) {
3510 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3512 return XML_ERROR_NO_MEMORY;
3513 binding->uriAlloc = n + EXPAND_SPARE;
3514 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3515 for (p = parser->m_tagStack; p; p = p->parent)
3516 if (p->name.str == binding->uri)
3518 FREE(parser, binding->uri);
3521 /* if m_namespaceSeparator != '\0' then uri includes it already */
3522 uri = binding->uri + binding->uriLen;
3523 memcpy(uri, localPart, i * sizeof(XML_Char));
3524 /* we always have a namespace separator between localPart and prefix */
3527 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3528 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3530 tagNamePtr->str = binding->uri;
3531 return XML_ERROR_NONE;
3534 /* addBinding() overwrites the value of prefix->binding without checking.
3535 Therefore one must keep track of the old value outside of addBinding().
3537 static enum XML_Error
3538 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3539 const XML_Char *uri, BINDING **bindingsPtr)
3541 static const XML_Char xmlNamespace[] = {
3542 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3543 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3544 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3545 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3546 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3549 static const int xmlLen =
3550 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3551 static const XML_Char xmlnsNamespace[] = {
3552 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3553 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3554 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3555 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3558 static const int xmlnsLen =
3559 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3561 XML_Bool mustBeXML = XML_FALSE;
3562 XML_Bool isXML = XML_TRUE;
3563 XML_Bool isXMLNS = XML_TRUE;
3568 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3569 if (*uri == XML_T('\0') && prefix->name)
3570 return XML_ERROR_UNDECLARING_PREFIX;
3573 && prefix->name[0] == XML_T(ASCII_x)
3574 && prefix->name[1] == XML_T(ASCII_m)
3575 && prefix->name[2] == XML_T(ASCII_l)) {
3577 /* Not allowed to bind xmlns */
3578 if (prefix->name[3] == XML_T(ASCII_n)
3579 && prefix->name[4] == XML_T(ASCII_s)
3580 && prefix->name[5] == XML_T('\0'))
3581 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3583 if (prefix->name[3] == XML_T('\0'))
3584 mustBeXML = XML_TRUE;
3587 for (len = 0; uri[len]; len++) {
3588 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3591 if (!mustBeXML && isXMLNS
3592 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3593 isXMLNS = XML_FALSE;
3595 isXML = isXML && len == xmlLen;
3596 isXMLNS = isXMLNS && len == xmlnsLen;
3598 if (mustBeXML != isXML)
3599 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3600 : XML_ERROR_RESERVED_NAMESPACE_URI;
3603 return XML_ERROR_RESERVED_NAMESPACE_URI;
3605 if (parser->m_namespaceSeparator)
3607 if (parser->m_freeBindingList) {
3608 b = parser->m_freeBindingList;
3609 if (len > b->uriAlloc) {
3610 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
3611 sizeof(XML_Char) * (len + EXPAND_SPARE));
3613 return XML_ERROR_NO_MEMORY;
3615 b->uriAlloc = len + EXPAND_SPARE;
3617 parser->m_freeBindingList = b->nextTagBinding;
3620 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3622 return XML_ERROR_NO_MEMORY;
3623 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3626 return XML_ERROR_NO_MEMORY;
3628 b->uriAlloc = len + EXPAND_SPARE;
3631 memcpy(b->uri, uri, len * sizeof(XML_Char));
3632 if (parser->m_namespaceSeparator)
3633 b->uri[len - 1] = parser->m_namespaceSeparator;
3636 b->prevPrefixBinding = prefix->binding;
3637 /* NULL binding when default namespace undeclared */
3638 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3639 prefix->binding = NULL;
3641 prefix->binding = b;
3642 b->nextTagBinding = *bindingsPtr;
3644 /* if attId == NULL then we are not starting a namespace scope */
3645 if (attId && parser->m_startNamespaceDeclHandler)
3646 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3647 prefix->binding ? uri : 0);
3648 return XML_ERROR_NONE;
3651 /* The idea here is to avoid using stack for each CDATA section when
3652 the whole file is parsed with one call.
3654 static enum XML_Error PTRCALL
3655 cdataSectionProcessor(XML_Parser parser,
3658 const char **endPtr)
3660 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3661 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3662 if (result != XML_ERROR_NONE)
3665 if (parser->m_parentParser) { /* we are parsing an external entity */
3666 parser->m_processor = externalEntityContentProcessor;
3667 return externalEntityContentProcessor(parser, start, end, endPtr);
3670 parser->m_processor = contentProcessor;
3671 return contentProcessor(parser, start, end, endPtr);
3677 /* startPtr gets set to non-null if the section is closed, and to null if
3678 the section is not yet closed.
3680 static enum XML_Error
3681 doCdataSection(XML_Parser parser,
3682 const ENCODING *enc,
3683 const char **startPtr,
3685 const char **nextPtr,
3688 const char *s = *startPtr;
3689 const char **eventPP;
3690 const char **eventEndPP;
3691 if (enc == parser->m_encoding) {
3692 eventPP = &parser->m_eventPtr;
3694 eventEndPP = &parser->m_eventEndPtr;
3697 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3698 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3705 int tok = XmlCdataSectionTok(enc, s, end, &next);
3708 case XML_TOK_CDATA_SECT_CLOSE:
3709 if (parser->m_endCdataSectionHandler)
3710 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3712 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3713 else if (parser->m_characterDataHandler)
3714 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
3716 else if (parser->m_defaultHandler)
3717 reportDefault(parser, enc, s, next);
3720 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3721 return XML_ERROR_ABORTED;
3723 return XML_ERROR_NONE;
3724 case XML_TOK_DATA_NEWLINE:
3725 if (parser->m_characterDataHandler) {
3727 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3729 else if (parser->m_defaultHandler)
3730 reportDefault(parser, enc, s, next);
3732 case XML_TOK_DATA_CHARS:
3734 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3735 if (charDataHandler) {
3736 if (MUST_CONVERT(enc, s)) {
3738 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3739 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3741 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3742 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3743 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3749 charDataHandler(parser->m_handlerArg,
3751 (int)((XML_Char *)next - (XML_Char *)s));
3753 else if (parser->m_defaultHandler)
3754 reportDefault(parser, enc, s, next);
3757 case XML_TOK_INVALID:
3759 return XML_ERROR_INVALID_TOKEN;
3760 case XML_TOK_PARTIAL_CHAR:
3763 return XML_ERROR_NONE;
3765 return XML_ERROR_PARTIAL_CHAR;
3766 case XML_TOK_PARTIAL:
3770 return XML_ERROR_NONE;
3772 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3774 /* Every token returned by XmlCdataSectionTok() has its own
3775 * explicit case, so this default case will never be executed.
3776 * We retain it as a safety net and exclude it from the coverage
3782 return XML_ERROR_UNEXPECTED_STATE;
3783 /* LCOV_EXCL_STOP */
3786 *eventPP = s = next;
3787 switch (parser->m_parsingStatus.parsing) {
3790 return XML_ERROR_NONE;
3792 return XML_ERROR_ABORTED;
3801 /* The idea here is to avoid using stack for each IGNORE section when
3802 the whole file is parsed with one call.
3804 static enum XML_Error PTRCALL
3805 ignoreSectionProcessor(XML_Parser parser,
3808 const char **endPtr)
3810 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3811 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
3812 if (result != XML_ERROR_NONE)
3815 parser->m_processor = prologProcessor;
3816 return prologProcessor(parser, start, end, endPtr);
3821 /* startPtr gets set to non-null is the section is closed, and to null
3822 if the section is not yet closed.
3824 static enum XML_Error
3825 doIgnoreSection(XML_Parser parser,
3826 const ENCODING *enc,
3827 const char **startPtr,
3829 const char **nextPtr,
3834 const char *s = *startPtr;
3835 const char **eventPP;
3836 const char **eventEndPP;
3837 if (enc == parser->m_encoding) {
3838 eventPP = &parser->m_eventPtr;
3840 eventEndPP = &parser->m_eventEndPtr;
3843 /* It's not entirely clear, but it seems the following two lines
3844 * of code cannot be executed. The only occasions on which 'enc'
3845 * is not 'encoding' are when this function is called
3846 * from the internal entity processing, and IGNORE sections are an
3847 * error in internal entities.
3849 * Since it really isn't clear that this is true, we keep the code
3850 * and just remove it from our coverage tests.
3854 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3855 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3856 /* LCOV_EXCL_STOP */
3860 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3863 case XML_TOK_IGNORE_SECT:
3864 if (parser->m_defaultHandler)
3865 reportDefault(parser, enc, s, next);
3868 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3869 return XML_ERROR_ABORTED;
3871 return XML_ERROR_NONE;
3872 case XML_TOK_INVALID:
3874 return XML_ERROR_INVALID_TOKEN;
3875 case XML_TOK_PARTIAL_CHAR:
3878 return XML_ERROR_NONE;
3880 return XML_ERROR_PARTIAL_CHAR;
3881 case XML_TOK_PARTIAL:
3885 return XML_ERROR_NONE;
3887 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3889 /* All of the tokens that XmlIgnoreSectionTok() returns have
3890 * explicit cases to handle them, so this default case is never
3891 * executed. We keep it as a safety net anyway, and remove it
3892 * from our test coverage statistics.
3897 return XML_ERROR_UNEXPECTED_STATE;
3898 /* LCOV_EXCL_STOP */
3903 #endif /* XML_DTD */
3905 static enum XML_Error
3906 initializeEncoding(XML_Parser parser)
3910 char encodingBuf[128];
3911 /* See comments abount `protoclEncodingName` in parserInit() */
3912 if (!parser->m_protocolEncodingName)
3916 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
3917 if (i == sizeof(encodingBuf) - 1
3918 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
3919 encodingBuf[0] = '\0';
3922 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
3924 encodingBuf[i] = '\0';
3928 s = parser->m_protocolEncodingName;
3930 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
3931 return XML_ERROR_NONE;
3932 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
3935 static enum XML_Error
3936 processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
3937 const char *s, const char *next)
3939 const char *encodingName = NULL;
3940 const XML_Char *storedEncName = NULL;
3941 const ENCODING *newEncoding = NULL;
3942 const char *version = NULL;
3943 const char *versionend;
3944 const XML_Char *storedversion = NULL;
3945 int standalone = -1;
3948 : XmlParseXmlDecl)(isGeneralTextEntity,
3952 &parser->m_eventPtr,
3958 if (isGeneralTextEntity)
3959 return XML_ERROR_TEXT_DECL;
3961 return XML_ERROR_XML_DECL;
3963 if (!isGeneralTextEntity && standalone == 1) {
3964 parser->m_dtd->standalone = XML_TRUE;
3966 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3967 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3968 #endif /* XML_DTD */
3970 if (parser->m_xmlDeclHandler) {
3971 if (encodingName != NULL) {
3972 storedEncName = poolStoreString(&parser->m_temp2Pool,
3976 + XmlNameLength(parser->m_encoding, encodingName));
3978 return XML_ERROR_NO_MEMORY;
3979 poolFinish(&parser->m_temp2Pool);
3982 storedversion = poolStoreString(&parser->m_temp2Pool,
3985 versionend - parser->m_encoding->minBytesPerChar);
3987 return XML_ERROR_NO_MEMORY;
3989 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
3991 else if (parser->m_defaultHandler)
3992 reportDefault(parser, parser->m_encoding, s, next);
3993 if (parser->m_protocolEncodingName == NULL) {
3995 /* Check that the specified encoding does not conflict with what
3996 * the parser has already deduced. Do we have the same number
3997 * of bytes in the smallest representation of a character? If
3998 * this is UTF-16, is it the same endianness?
4000 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4001 || (newEncoding->minBytesPerChar == 2 &&
4002 newEncoding != parser->m_encoding)) {
4003 parser->m_eventPtr = encodingName;
4004 return XML_ERROR_INCORRECT_ENCODING;
4006 parser->m_encoding = newEncoding;
4008 else if (encodingName) {
4009 enum XML_Error result;
4010 if (!storedEncName) {
4011 storedEncName = poolStoreString(
4012 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4013 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4015 return XML_ERROR_NO_MEMORY;
4017 result = handleUnknownEncoding(parser, storedEncName);
4018 poolClear(&parser->m_temp2Pool);
4019 if (result == XML_ERROR_UNKNOWN_ENCODING)
4020 parser->m_eventPtr = encodingName;
4025 if (storedEncName || storedversion)
4026 poolClear(&parser->m_temp2Pool);
4028 return XML_ERROR_NONE;
4031 static enum XML_Error
4032 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4034 if (parser->m_unknownEncodingHandler) {
4037 for (i = 0; i < 256; i++)
4039 info.convert = NULL;
4041 info.release = NULL;
4042 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
4045 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4046 if (!parser->m_unknownEncodingMem) {
4048 info.release(info.data);
4049 return XML_ERROR_NO_MEMORY;
4052 ? XmlInitUnknownEncodingNS
4053 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
4058 parser->m_unknownEncodingData = info.data;
4059 parser->m_unknownEncodingRelease = info.release;
4060 parser->m_encoding = enc;
4061 return XML_ERROR_NONE;
4064 if (info.release != NULL)
4065 info.release(info.data);
4067 return XML_ERROR_UNKNOWN_ENCODING;
4070 static enum XML_Error PTRCALL
4071 prologInitProcessor(XML_Parser parser,
4074 const char **nextPtr)
4076 enum XML_Error result = initializeEncoding(parser);
4077 if (result != XML_ERROR_NONE)
4079 parser->m_processor = prologProcessor;
4080 return prologProcessor(parser, s, end, nextPtr);
4085 static enum XML_Error PTRCALL
4086 externalParEntInitProcessor(XML_Parser parser,
4089 const char **nextPtr)
4091 enum XML_Error result = initializeEncoding(parser);
4092 if (result != XML_ERROR_NONE)
4095 /* we know now that XML_Parse(Buffer) has been called,
4096 so we consider the external parameter entity read */
4097 parser->m_dtd->paramEntityRead = XML_TRUE;
4099 if (parser->m_prologState.inEntityValue) {
4100 parser->m_processor = entityValueInitProcessor;
4101 return entityValueInitProcessor(parser, s, end, nextPtr);
4104 parser->m_processor = externalParEntProcessor;
4105 return externalParEntProcessor(parser, s, end, nextPtr);
4109 static enum XML_Error PTRCALL
4110 entityValueInitProcessor(XML_Parser parser,
4113 const char **nextPtr)
4116 const char *start = s;
4117 const char *next = start;
4118 parser->m_eventPtr = start;
4121 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4122 parser->m_eventEndPtr = next;
4124 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4126 return XML_ERROR_NONE;
4129 case XML_TOK_INVALID:
4130 return XML_ERROR_INVALID_TOKEN;
4131 case XML_TOK_PARTIAL:
4132 return XML_ERROR_UNCLOSED_TOKEN;
4133 case XML_TOK_PARTIAL_CHAR:
4134 return XML_ERROR_PARTIAL_CHAR;
4135 case XML_TOK_NONE: /* start == end */
4139 /* found end of entity value - can store it now */
4140 return storeEntityValue(parser, parser->m_encoding, s, end);
4142 else if (tok == XML_TOK_XML_DECL) {
4143 enum XML_Error result;
4144 result = processXmlDecl(parser, 0, start, next);
4145 if (result != XML_ERROR_NONE)
4147 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
4148 * to happen, a parameter entity parsing handler must have
4149 * attempted to suspend the parser, which fails and raises an
4150 * error. The parser can be aborted, but can't be suspended.
4152 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4153 return XML_ERROR_ABORTED;
4155 /* stop scanning for text declaration - we found one */
4156 parser->m_processor = entityValueProcessor;
4157 return entityValueProcessor(parser, next, end, nextPtr);
4159 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4160 return XML_TOK_NONE on the next call, which would then cause the
4161 function to exit with *nextPtr set to s - that is what we want for other
4162 tokens, but not for the BOM - we would rather like to skip it;
4163 then, when this routine is entered the next time, XmlPrologTok will
4164 return XML_TOK_INVALID, since the BOM is still in the buffer
4166 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
4168 return XML_ERROR_NONE;
4170 /* If we get this token, we have the start of what might be a
4171 normal tag, but not a declaration (i.e. it doesn't begin with
4172 "<!"). In a DTD context, that isn't legal.
4174 else if (tok == XML_TOK_INSTANCE_START) {
4176 return XML_ERROR_SYNTAX;
4179 parser->m_eventPtr = start;
4183 static enum XML_Error PTRCALL
4184 externalParEntProcessor(XML_Parser parser,
4187 const char **nextPtr)
4189 const char *next = s;
4192 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4194 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4196 return XML_ERROR_NONE;
4199 case XML_TOK_INVALID:
4200 return XML_ERROR_INVALID_TOKEN;
4201 case XML_TOK_PARTIAL:
4202 return XML_ERROR_UNCLOSED_TOKEN;
4203 case XML_TOK_PARTIAL_CHAR:
4204 return XML_ERROR_PARTIAL_CHAR;
4205 case XML_TOK_NONE: /* start == end */
4210 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4211 However, when parsing an external subset, doProlog will not accept a BOM
4212 as valid, and report a syntax error, so we have to skip the BOM
4214 else if (tok == XML_TOK_BOM) {
4216 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4219 parser->m_processor = prologProcessor;
4220 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4221 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4224 static enum XML_Error PTRCALL
4225 entityValueProcessor(XML_Parser parser,
4228 const char **nextPtr)
4230 const char *start = s;
4231 const char *next = s;
4232 const ENCODING *enc = parser->m_encoding;
4236 tok = XmlPrologTok(enc, start, end, &next);
4238 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4240 return XML_ERROR_NONE;
4243 case XML_TOK_INVALID:
4244 return XML_ERROR_INVALID_TOKEN;
4245 case XML_TOK_PARTIAL:
4246 return XML_ERROR_UNCLOSED_TOKEN;
4247 case XML_TOK_PARTIAL_CHAR:
4248 return XML_ERROR_PARTIAL_CHAR;
4249 case XML_TOK_NONE: /* start == end */
4253 /* found end of entity value - can store it now */
4254 return storeEntityValue(parser, enc, s, end);
4260 #endif /* XML_DTD */
4262 static enum XML_Error PTRCALL
4263 prologProcessor(XML_Parser parser,
4266 const char **nextPtr)
4268 const char *next = s;
4269 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4270 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4271 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
4274 static enum XML_Error
4275 doProlog(XML_Parser parser,
4276 const ENCODING *enc,
4281 const char **nextPtr,
4285 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
4286 #endif /* XML_DTD */
4287 static const XML_Char atypeCDATA[] =
4288 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4289 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4290 static const XML_Char atypeIDREF[] =
4291 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4292 static const XML_Char atypeIDREFS[] =
4293 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4294 static const XML_Char atypeENTITY[] =
4295 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4296 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4297 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
4298 static const XML_Char atypeNMTOKEN[] = {
4299 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4300 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4301 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4302 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4303 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4304 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4305 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
4307 /* save one level of indirection */
4308 DTD * const dtd = parser->m_dtd;
4310 const char **eventPP;
4311 const char **eventEndPP;
4312 enum XML_Content_Quant quant;
4314 if (enc == parser->m_encoding) {
4315 eventPP = &parser->m_eventPtr;
4316 eventEndPP = &parser->m_eventEndPtr;
4319 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4320 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4325 XML_Bool handleDefault = XML_TRUE;
4329 if (haveMore && tok != XML_TOK_INVALID) {
4331 return XML_ERROR_NONE;
4334 case XML_TOK_INVALID:
4336 return XML_ERROR_INVALID_TOKEN;
4337 case XML_TOK_PARTIAL:
4338 return XML_ERROR_UNCLOSED_TOKEN;
4339 case XML_TOK_PARTIAL_CHAR:
4340 return XML_ERROR_PARTIAL_CHAR;
4341 case -XML_TOK_PROLOG_S:
4346 /* for internal PE NOT referenced between declarations */
4347 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
4349 return XML_ERROR_NONE;
4351 /* WFC: PE Between Declarations - must check that PE contains
4352 complete markup, not only for external PEs, but also for
4353 internal PEs if the reference occurs between declarations.
4355 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4356 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4358 return XML_ERROR_INCOMPLETE_PE;
4360 return XML_ERROR_NONE;
4362 #endif /* XML_DTD */
4363 return XML_ERROR_NO_ELEMENTS;
4370 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4372 case XML_ROLE_XML_DECL:
4374 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4375 if (result != XML_ERROR_NONE)
4377 enc = parser->m_encoding;
4378 handleDefault = XML_FALSE;
4381 case XML_ROLE_DOCTYPE_NAME:
4382 if (parser->m_startDoctypeDeclHandler) {
4383 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4384 if (!parser->m_doctypeName)
4385 return XML_ERROR_NO_MEMORY;
4386 poolFinish(&parser->m_tempPool);
4387 parser->m_doctypePubid = NULL;
4388 handleDefault = XML_FALSE;
4390 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4392 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4393 if (parser->m_startDoctypeDeclHandler) {
4394 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4395 parser->m_doctypePubid, 1);
4396 parser->m_doctypeName = NULL;
4397 poolClear(&parser->m_tempPool);
4398 handleDefault = XML_FALSE;
4402 case XML_ROLE_TEXT_DECL:
4404 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4405 if (result != XML_ERROR_NONE)
4407 enc = parser->m_encoding;
4408 handleDefault = XML_FALSE;
4411 #endif /* XML_DTD */
4412 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4414 parser->m_useForeignDTD = XML_FALSE;
4415 parser->m_declEntity = (ENTITY *)lookup(parser,
4416 &dtd->paramEntities,
4419 if (!parser->m_declEntity)
4420 return XML_ERROR_NO_MEMORY;
4421 #endif /* XML_DTD */
4422 dtd->hasParamEntityRefs = XML_TRUE;
4423 if (parser->m_startDoctypeDeclHandler) {
4425 if (!XmlIsPublicId(enc, s, next, eventPP))
4426 return XML_ERROR_PUBLICID;
4427 pubId = poolStoreString(&parser->m_tempPool, enc,
4428 s + enc->minBytesPerChar,
4429 next - enc->minBytesPerChar);
4431 return XML_ERROR_NO_MEMORY;
4432 normalizePublicId(pubId);
4433 poolFinish(&parser->m_tempPool);
4434 parser->m_doctypePubid = pubId;
4435 handleDefault = XML_FALSE;
4436 goto alreadyChecked;
4439 case XML_ROLE_ENTITY_PUBLIC_ID:
4440 if (!XmlIsPublicId(enc, s, next, eventPP))
4441 return XML_ERROR_PUBLICID;
4443 if (dtd->keepProcessing && parser->m_declEntity) {
4444 XML_Char *tem = poolStoreString(&dtd->pool,
4446 s + enc->minBytesPerChar,
4447 next - enc->minBytesPerChar);
4449 return XML_ERROR_NO_MEMORY;
4450 normalizePublicId(tem);
4451 parser->m_declEntity->publicId = tem;
4452 poolFinish(&dtd->pool);
4453 /* Don't suppress the default handler if we fell through from
4454 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4456 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4457 handleDefault = XML_FALSE;
4460 case XML_ROLE_DOCTYPE_CLOSE:
4461 if (parser->m_doctypeName) {
4462 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4463 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4464 poolClear(&parser->m_tempPool);
4465 handleDefault = XML_FALSE;
4467 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4468 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4469 was not set, indicating an external subset
4472 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4473 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4474 dtd->hasParamEntityRefs = XML_TRUE;
4475 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4476 ENTITY *entity = (ENTITY *)lookup(parser,
4477 &dtd->paramEntities,
4481 /* The external subset name "#" will have already been
4482 * inserted into the hash table at the start of the
4483 * external entity parsing, so no allocation will happen
4484 * and lookup() cannot fail.
4486 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4488 if (parser->m_useForeignDTD)
4489 entity->base = parser->m_curBase;
4490 dtd->paramEntityRead = XML_FALSE;
4491 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4496 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4497 if (dtd->paramEntityRead) {
4498 if (!dtd->standalone &&
4499 parser->m_notStandaloneHandler &&
4500 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4501 return XML_ERROR_NOT_STANDALONE;
4503 /* if we didn't read the foreign DTD then this means that there
4504 is no external subset and we must reset dtd->hasParamEntityRefs
4506 else if (!parser->m_doctypeSysid)
4507 dtd->hasParamEntityRefs = hadParamEntityRefs;
4508 /* end of DTD - no need to update dtd->keepProcessing */
4510 parser->m_useForeignDTD = XML_FALSE;
4512 #endif /* XML_DTD */
4513 if (parser->m_endDoctypeDeclHandler) {
4514 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4515 handleDefault = XML_FALSE;
4518 case XML_ROLE_INSTANCE_START:
4520 /* if there is no DOCTYPE declaration then now is the
4521 last chance to read the foreign DTD
4523 if (parser->m_useForeignDTD) {
4524 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4525 dtd->hasParamEntityRefs = XML_TRUE;
4526 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
4527 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4531 return XML_ERROR_NO_MEMORY;
4532 entity->base = parser->m_curBase;
4533 dtd->paramEntityRead = XML_FALSE;
4534 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
4539 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4540 if (dtd->paramEntityRead) {
4541 if (!dtd->standalone &&
4542 parser->m_notStandaloneHandler &&
4543 !parser->m_notStandaloneHandler(parser->m_handlerArg))
4544 return XML_ERROR_NOT_STANDALONE;
4546 /* if we didn't read the foreign DTD then this means that there
4547 is no external subset and we must reset dtd->hasParamEntityRefs
4550 dtd->hasParamEntityRefs = hadParamEntityRefs;
4551 /* end of DTD - no need to update dtd->keepProcessing */
4554 #endif /* XML_DTD */
4555 parser->m_processor = contentProcessor;
4556 return contentProcessor(parser, s, end, nextPtr);
4557 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4558 parser->m_declElementType = getElementType(parser, enc, s, next);
4559 if (!parser->m_declElementType)
4560 return XML_ERROR_NO_MEMORY;
4561 goto checkAttListDeclHandler;
4562 case XML_ROLE_ATTRIBUTE_NAME:
4563 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4564 if (!parser->m_declAttributeId)
4565 return XML_ERROR_NO_MEMORY;
4566 parser->m_declAttributeIsCdata = XML_FALSE;
4567 parser->m_declAttributeType = NULL;
4568 parser->m_declAttributeIsId = XML_FALSE;
4569 goto checkAttListDeclHandler;
4570 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4571 parser->m_declAttributeIsCdata = XML_TRUE;
4572 parser->m_declAttributeType = atypeCDATA;
4573 goto checkAttListDeclHandler;
4574 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4575 parser->m_declAttributeIsId = XML_TRUE;
4576 parser->m_declAttributeType = atypeID;
4577 goto checkAttListDeclHandler;
4578 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4579 parser->m_declAttributeType = atypeIDREF;
4580 goto checkAttListDeclHandler;
4581 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4582 parser->m_declAttributeType = atypeIDREFS;
4583 goto checkAttListDeclHandler;
4584 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4585 parser->m_declAttributeType = atypeENTITY;
4586 goto checkAttListDeclHandler;
4587 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4588 parser->m_declAttributeType = atypeENTITIES;
4589 goto checkAttListDeclHandler;
4590 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4591 parser->m_declAttributeType = atypeNMTOKEN;
4592 goto checkAttListDeclHandler;
4593 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4594 parser->m_declAttributeType = atypeNMTOKENS;
4595 checkAttListDeclHandler:
4596 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4597 handleDefault = XML_FALSE;
4599 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4600 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4601 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4602 const XML_Char *prefix;
4603 if (parser->m_declAttributeType) {
4604 prefix = enumValueSep;
4607 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4611 if (!poolAppendString(&parser->m_tempPool, prefix))
4612 return XML_ERROR_NO_MEMORY;
4613 if (!poolAppend(&parser->m_tempPool, enc, s, next))
4614 return XML_ERROR_NO_MEMORY;
4615 parser->m_declAttributeType = parser->m_tempPool.start;
4616 handleDefault = XML_FALSE;
4619 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4620 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4621 if (dtd->keepProcessing) {
4622 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4623 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
4625 return XML_ERROR_NO_MEMORY;
4626 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4627 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4628 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4629 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4630 /* Enumerated or Notation type */
4631 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4632 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4633 return XML_ERROR_NO_MEMORY;
4634 parser->m_declAttributeType = parser->m_tempPool.start;
4635 poolFinish(&parser->m_tempPool);
4638 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4639 parser->m_declAttributeId->name, parser->m_declAttributeType,
4640 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4641 poolClear(&parser->m_tempPool);
4642 handleDefault = XML_FALSE;
4646 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4647 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4648 if (dtd->keepProcessing) {
4649 const XML_Char *attVal;
4650 enum XML_Error result =
4651 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
4652 s + enc->minBytesPerChar,
4653 next - enc->minBytesPerChar,
4657 attVal = poolStart(&dtd->pool);
4658 poolFinish(&dtd->pool);
4659 /* ID attributes aren't allowed to have a default */
4660 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4661 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4662 return XML_ERROR_NO_MEMORY;
4663 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4664 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4665 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4666 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4667 /* Enumerated or Notation type */
4668 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4669 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4670 return XML_ERROR_NO_MEMORY;
4671 parser->m_declAttributeType = parser->m_tempPool.start;
4672 poolFinish(&parser->m_tempPool);
4675 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4676 parser->m_declAttributeId->name, parser->m_declAttributeType,
4678 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4679 poolClear(&parser->m_tempPool);
4680 handleDefault = XML_FALSE;
4684 case XML_ROLE_ENTITY_VALUE:
4685 if (dtd->keepProcessing) {
4686 enum XML_Error result = storeEntityValue(parser, enc,
4687 s + enc->minBytesPerChar,
4688 next - enc->minBytesPerChar);
4689 if (parser->m_declEntity) {
4690 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4691 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
4692 poolFinish(&dtd->entityValuePool);
4693 if (parser->m_entityDeclHandler) {
4695 parser->m_entityDeclHandler(parser->m_handlerArg,
4696 parser->m_declEntity->name,
4697 parser->m_declEntity->is_param,
4698 parser->m_declEntity->textPtr,
4699 parser->m_declEntity->textLen,
4700 parser->m_curBase, 0, 0, 0);
4701 handleDefault = XML_FALSE;
4705 poolDiscard(&dtd->entityValuePool);
4706 if (result != XML_ERROR_NONE)
4710 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4712 parser->m_useForeignDTD = XML_FALSE;
4713 #endif /* XML_DTD */
4714 dtd->hasParamEntityRefs = XML_TRUE;
4715 if (parser->m_startDoctypeDeclHandler) {
4716 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4717 s + enc->minBytesPerChar,
4718 next - enc->minBytesPerChar);
4719 if (parser->m_doctypeSysid == NULL)
4720 return XML_ERROR_NO_MEMORY;
4721 poolFinish(&parser->m_tempPool);
4722 handleDefault = XML_FALSE;
4726 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4727 for the case where no parser->m_startDoctypeDeclHandler is set */
4728 parser->m_doctypeSysid = externalSubsetName;
4729 #endif /* XML_DTD */
4730 if (!dtd->standalone
4732 && !parser->m_paramEntityParsing
4733 #endif /* XML_DTD */
4734 && parser->m_notStandaloneHandler
4735 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
4736 return XML_ERROR_NOT_STANDALONE;
4740 if (!parser->m_declEntity) {
4741 parser->m_declEntity = (ENTITY *)lookup(parser,
4742 &dtd->paramEntities,
4745 if (!parser->m_declEntity)
4746 return XML_ERROR_NO_MEMORY;
4747 parser->m_declEntity->publicId = NULL;
4750 #endif /* XML_DTD */
4751 case XML_ROLE_ENTITY_SYSTEM_ID:
4752 if (dtd->keepProcessing && parser->m_declEntity) {
4753 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
4754 s + enc->minBytesPerChar,
4755 next - enc->minBytesPerChar);
4756 if (!parser->m_declEntity->systemId)
4757 return XML_ERROR_NO_MEMORY;
4758 parser->m_declEntity->base = parser->m_curBase;
4759 poolFinish(&dtd->pool);
4760 /* Don't suppress the default handler if we fell through from
4761 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4763 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4764 handleDefault = XML_FALSE;
4767 case XML_ROLE_ENTITY_COMPLETE:
4768 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
4770 parser->m_entityDeclHandler(parser->m_handlerArg,
4771 parser->m_declEntity->name,
4772 parser->m_declEntity->is_param,
4774 parser->m_declEntity->base,
4775 parser->m_declEntity->systemId,
4776 parser->m_declEntity->publicId,
4778 handleDefault = XML_FALSE;
4781 case XML_ROLE_ENTITY_NOTATION_NAME:
4782 if (dtd->keepProcessing && parser->m_declEntity) {
4783 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4784 if (!parser->m_declEntity->notation)
4785 return XML_ERROR_NO_MEMORY;
4786 poolFinish(&dtd->pool);
4787 if (parser->m_unparsedEntityDeclHandler) {
4789 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4790 parser->m_declEntity->name,
4791 parser->m_declEntity->base,
4792 parser->m_declEntity->systemId,
4793 parser->m_declEntity->publicId,
4794 parser->m_declEntity->notation);
4795 handleDefault = XML_FALSE;
4797 else if (parser->m_entityDeclHandler) {
4799 parser->m_entityDeclHandler(parser->m_handlerArg,
4800 parser->m_declEntity->name,
4802 parser->m_declEntity->base,
4803 parser->m_declEntity->systemId,
4804 parser->m_declEntity->publicId,
4805 parser->m_declEntity->notation);
4806 handleDefault = XML_FALSE;
4810 case XML_ROLE_GENERAL_ENTITY_NAME:
4812 if (XmlPredefinedEntityName(enc, s, next)) {
4813 parser->m_declEntity = NULL;
4816 if (dtd->keepProcessing) {
4817 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4819 return XML_ERROR_NO_MEMORY;
4820 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
4822 if (!parser->m_declEntity)
4823 return XML_ERROR_NO_MEMORY;
4824 if (parser->m_declEntity->name != name) {
4825 poolDiscard(&dtd->pool);
4826 parser->m_declEntity = NULL;
4829 poolFinish(&dtd->pool);
4830 parser->m_declEntity->publicId = NULL;
4831 parser->m_declEntity->is_param = XML_FALSE;
4832 /* if we have a parent parser or are reading an internal parameter
4833 entity, then the entity declaration is not considered "internal"
4835 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4836 if (parser->m_entityDeclHandler)
4837 handleDefault = XML_FALSE;
4841 poolDiscard(&dtd->pool);
4842 parser->m_declEntity = NULL;
4846 case XML_ROLE_PARAM_ENTITY_NAME:
4848 if (dtd->keepProcessing) {
4849 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4851 return XML_ERROR_NO_MEMORY;
4852 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4853 name, sizeof(ENTITY));
4854 if (!parser->m_declEntity)
4855 return XML_ERROR_NO_MEMORY;
4856 if (parser->m_declEntity->name != name) {
4857 poolDiscard(&dtd->pool);
4858 parser->m_declEntity = NULL;
4861 poolFinish(&dtd->pool);
4862 parser->m_declEntity->publicId = NULL;
4863 parser->m_declEntity->is_param = XML_TRUE;
4864 /* if we have a parent parser or are reading an internal parameter
4865 entity, then the entity declaration is not considered "internal"
4867 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4868 if (parser->m_entityDeclHandler)
4869 handleDefault = XML_FALSE;
4873 poolDiscard(&dtd->pool);
4874 parser->m_declEntity = NULL;
4876 #else /* not XML_DTD */
4877 parser->m_declEntity = NULL;
4878 #endif /* XML_DTD */
4880 case XML_ROLE_NOTATION_NAME:
4881 parser->m_declNotationPublicId = NULL;
4882 parser->m_declNotationName = NULL;
4883 if (parser->m_notationDeclHandler) {
4884 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4885 if (!parser->m_declNotationName)
4886 return XML_ERROR_NO_MEMORY;
4887 poolFinish(&parser->m_tempPool);
4888 handleDefault = XML_FALSE;
4891 case XML_ROLE_NOTATION_PUBLIC_ID:
4892 if (!XmlIsPublicId(enc, s, next, eventPP))
4893 return XML_ERROR_PUBLICID;
4894 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4895 XML_Char *tem = poolStoreString(&parser->m_tempPool,
4897 s + enc->minBytesPerChar,
4898 next - enc->minBytesPerChar);
4900 return XML_ERROR_NO_MEMORY;
4901 normalizePublicId(tem);
4902 parser->m_declNotationPublicId = tem;
4903 poolFinish(&parser->m_tempPool);
4904 handleDefault = XML_FALSE;
4907 case XML_ROLE_NOTATION_SYSTEM_ID:
4908 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
4909 const XML_Char *systemId
4910 = poolStoreString(&parser->m_tempPool, enc,
4911 s + enc->minBytesPerChar,
4912 next - enc->minBytesPerChar);
4914 return XML_ERROR_NO_MEMORY;
4916 parser->m_notationDeclHandler(parser->m_handlerArg,
4917 parser->m_declNotationName,
4920 parser->m_declNotationPublicId);
4921 handleDefault = XML_FALSE;
4923 poolClear(&parser->m_tempPool);
4925 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4926 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
4928 parser->m_notationDeclHandler(parser->m_handlerArg,
4929 parser->m_declNotationName,
4932 parser->m_declNotationPublicId);
4933 handleDefault = XML_FALSE;
4935 poolClear(&parser->m_tempPool);
4937 case XML_ROLE_ERROR:
4939 case XML_TOK_PARAM_ENTITY_REF:
4940 /* PE references in internal subset are
4941 not allowed within declarations. */
4942 return XML_ERROR_PARAM_ENTITY_REF;
4943 case XML_TOK_XML_DECL:
4944 return XML_ERROR_MISPLACED_XML_PI;
4946 return XML_ERROR_SYNTAX;
4949 case XML_ROLE_IGNORE_SECT:
4951 enum XML_Error result;
4952 if (parser->m_defaultHandler)
4953 reportDefault(parser, enc, s, next);
4954 handleDefault = XML_FALSE;
4955 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4956 if (result != XML_ERROR_NONE)
4959 parser->m_processor = ignoreSectionProcessor;
4964 #endif /* XML_DTD */
4965 case XML_ROLE_GROUP_OPEN:
4966 if (parser->m_prologState.level >= parser->m_groupSize) {
4967 if (parser->m_groupSize) {
4968 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
4970 parser->m_groupSize /= 2;
4971 return XML_ERROR_NO_MEMORY;
4973 parser->m_groupConnector = temp;
4974 if (dtd->scaffIndex) {
4975 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4976 parser->m_groupSize * sizeof(int));
4978 return XML_ERROR_NO_MEMORY;
4979 dtd->scaffIndex = temp;
4983 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
4984 if (!parser->m_groupConnector) {
4985 parser->m_groupSize = 0;
4986 return XML_ERROR_NO_MEMORY;
4990 parser->m_groupConnector[parser->m_prologState.level] = 0;
4991 if (dtd->in_eldecl) {
4992 int myindex = nextScaffoldPart(parser);
4994 return XML_ERROR_NO_MEMORY;
4995 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4997 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4998 if (parser->m_elementDeclHandler)
4999 handleDefault = XML_FALSE;
5002 case XML_ROLE_GROUP_SEQUENCE:
5003 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5004 return XML_ERROR_SYNTAX;
5005 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5006 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5007 handleDefault = XML_FALSE;
5009 case XML_ROLE_GROUP_CHOICE:
5010 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5011 return XML_ERROR_SYNTAX;
5013 && !parser->m_groupConnector[parser->m_prologState.level]
5014 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5017 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5019 if (parser->m_elementDeclHandler)
5020 handleDefault = XML_FALSE;
5022 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5024 case XML_ROLE_PARAM_ENTITY_REF:
5026 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5027 dtd->hasParamEntityRefs = XML_TRUE;
5028 if (!parser->m_paramEntityParsing)
5029 dtd->keepProcessing = dtd->standalone;
5031 const XML_Char *name;
5033 name = poolStoreString(&dtd->pool, enc,
5034 s + enc->minBytesPerChar,
5035 next - enc->minBytesPerChar);
5037 return XML_ERROR_NO_MEMORY;
5038 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5039 poolDiscard(&dtd->pool);
5040 /* first, determine if a check for an existing declaration is needed;
5041 if yes, check that the entity exists, and that it is internal,
5042 otherwise call the skipped entity handler
5044 if (parser->m_prologState.documentEntity &&
5046 ? !parser->m_openInternalEntities
5047 : !dtd->hasParamEntityRefs)) {
5049 return XML_ERROR_UNDEFINED_ENTITY;
5050 else if (!entity->is_internal) {
5051 /* It's hard to exhaustively search the code to be sure,
5052 * but there doesn't seem to be a way of executing the
5053 * following line. There are two cases:
5055 * If 'standalone' is false, the DTD must have no
5056 * parameter entities or we wouldn't have passed the outer
5057 * 'if' statement. That measn the only entity in the hash
5058 * table is the external subset name "#" which cannot be
5059 * given as a parameter entity name in XML syntax, so the
5060 * lookup must have returned NULL and we don't even reach
5061 * the test for an internal entity.
5063 * If 'standalone' is true, it does not seem to be
5064 * possible to create entities taking this code path that
5065 * are not internal entities, so fail the test above.
5067 * Because this analysis is very uncertain, the code is
5068 * being left in place and merely removed from the
5069 * coverage test statistics.
5071 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5075 dtd->keepProcessing = dtd->standalone;
5076 /* cannot report skipped entities in declarations */
5077 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5078 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5079 handleDefault = XML_FALSE;
5084 return XML_ERROR_RECURSIVE_ENTITY_REF;
5085 if (entity->textPtr) {
5086 enum XML_Error result;
5087 XML_Bool betweenDecl =
5088 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5089 result = processInternalEntity(parser, entity, betweenDecl);
5090 if (result != XML_ERROR_NONE)
5092 handleDefault = XML_FALSE;
5095 if (parser->m_externalEntityRefHandler) {
5096 dtd->paramEntityRead = XML_FALSE;
5097 entity->open = XML_TRUE;
5098 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5102 entity->publicId)) {
5103 entity->open = XML_FALSE;
5104 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5106 entity->open = XML_FALSE;
5107 handleDefault = XML_FALSE;
5108 if (!dtd->paramEntityRead) {
5109 dtd->keepProcessing = dtd->standalone;
5114 dtd->keepProcessing = dtd->standalone;
5118 #endif /* XML_DTD */
5119 if (!dtd->standalone &&
5120 parser->m_notStandaloneHandler &&
5121 !parser->m_notStandaloneHandler(parser->m_handlerArg))
5122 return XML_ERROR_NOT_STANDALONE;
5125 /* Element declaration stuff */
5127 case XML_ROLE_ELEMENT_NAME:
5128 if (parser->m_elementDeclHandler) {
5129 parser->m_declElementType = getElementType(parser, enc, s, next);
5130 if (!parser->m_declElementType)
5131 return XML_ERROR_NO_MEMORY;
5132 dtd->scaffLevel = 0;
5133 dtd->scaffCount = 0;
5134 dtd->in_eldecl = XML_TRUE;
5135 handleDefault = XML_FALSE;
5139 case XML_ROLE_CONTENT_ANY:
5140 case XML_ROLE_CONTENT_EMPTY:
5141 if (dtd->in_eldecl) {
5142 if (parser->m_elementDeclHandler) {
5143 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
5145 return XML_ERROR_NO_MEMORY;
5146 content->quant = XML_CQUANT_NONE;
5147 content->name = NULL;
5148 content->numchildren = 0;
5149 content->children = NULL;
5150 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5154 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
5155 handleDefault = XML_FALSE;
5157 dtd->in_eldecl = XML_FALSE;
5161 case XML_ROLE_CONTENT_PCDATA:
5162 if (dtd->in_eldecl) {
5163 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5165 if (parser->m_elementDeclHandler)
5166 handleDefault = XML_FALSE;
5170 case XML_ROLE_CONTENT_ELEMENT:
5171 quant = XML_CQUANT_NONE;
5172 goto elementContent;
5173 case XML_ROLE_CONTENT_ELEMENT_OPT:
5174 quant = XML_CQUANT_OPT;
5175 goto elementContent;
5176 case XML_ROLE_CONTENT_ELEMENT_REP:
5177 quant = XML_CQUANT_REP;
5178 goto elementContent;
5179 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5180 quant = XML_CQUANT_PLUS;
5182 if (dtd->in_eldecl) {
5184 const XML_Char *name;
5186 const char *nxt = (quant == XML_CQUANT_NONE
5188 : next - enc->minBytesPerChar);
5189 int myindex = nextScaffoldPart(parser);
5191 return XML_ERROR_NO_MEMORY;
5192 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5193 dtd->scaffold[myindex].quant = quant;
5194 el = getElementType(parser, enc, s, nxt);
5196 return XML_ERROR_NO_MEMORY;
5198 dtd->scaffold[myindex].name = name;
5200 for (; name[nameLen++]; );
5201 dtd->contentStringLen += nameLen;
5202 if (parser->m_elementDeclHandler)
5203 handleDefault = XML_FALSE;
5207 case XML_ROLE_GROUP_CLOSE:
5208 quant = XML_CQUANT_NONE;
5210 case XML_ROLE_GROUP_CLOSE_OPT:
5211 quant = XML_CQUANT_OPT;
5213 case XML_ROLE_GROUP_CLOSE_REP:
5214 quant = XML_CQUANT_REP;
5216 case XML_ROLE_GROUP_CLOSE_PLUS:
5217 quant = XML_CQUANT_PLUS;
5219 if (dtd->in_eldecl) {
5220 if (parser->m_elementDeclHandler)
5221 handleDefault = XML_FALSE;
5223 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5224 if (dtd->scaffLevel == 0) {
5225 if (!handleDefault) {
5226 XML_Content *model = build_model(parser);
5228 return XML_ERROR_NO_MEMORY;
5230 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
5232 dtd->in_eldecl = XML_FALSE;
5233 dtd->contentStringLen = 0;
5237 /* End element declaration stuff */
5240 if (!reportProcessingInstruction(parser, enc, s, next))
5241 return XML_ERROR_NO_MEMORY;
5242 handleDefault = XML_FALSE;
5244 case XML_ROLE_COMMENT:
5245 if (!reportComment(parser, enc, s, next))
5246 return XML_ERROR_NO_MEMORY;
5247 handleDefault = XML_FALSE;
5252 handleDefault = XML_FALSE;
5256 case XML_ROLE_DOCTYPE_NONE:
5257 if (parser->m_startDoctypeDeclHandler)
5258 handleDefault = XML_FALSE;
5260 case XML_ROLE_ENTITY_NONE:
5261 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5262 handleDefault = XML_FALSE;
5264 case XML_ROLE_NOTATION_NONE:
5265 if (parser->m_notationDeclHandler)
5266 handleDefault = XML_FALSE;
5268 case XML_ROLE_ATTLIST_NONE:
5269 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5270 handleDefault = XML_FALSE;
5272 case XML_ROLE_ELEMENT_NONE:
5273 if (parser->m_elementDeclHandler)
5274 handleDefault = XML_FALSE;
5276 } /* end of big switch */
5278 if (handleDefault && parser->m_defaultHandler)
5279 reportDefault(parser, enc, s, next);
5281 switch (parser->m_parsingStatus.parsing) {
5284 return XML_ERROR_NONE;
5286 return XML_ERROR_ABORTED;
5289 tok = XmlPrologTok(enc, s, end, &next);
5295 static enum XML_Error PTRCALL
5296 epilogProcessor(XML_Parser parser,
5299 const char **nextPtr)
5301 parser->m_processor = epilogProcessor;
5302 parser->m_eventPtr = s;
5304 const char *next = NULL;
5305 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5306 parser->m_eventEndPtr = next;
5308 /* report partial linebreak - it might be the last token */
5309 case -XML_TOK_PROLOG_S:
5310 if (parser->m_defaultHandler) {
5311 reportDefault(parser, parser->m_encoding, s, next);
5312 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5313 return XML_ERROR_ABORTED;
5316 return XML_ERROR_NONE;
5319 return XML_ERROR_NONE;
5320 case XML_TOK_PROLOG_S:
5321 if (parser->m_defaultHandler)
5322 reportDefault(parser, parser->m_encoding, s, next);
5325 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
5326 return XML_ERROR_NO_MEMORY;
5328 case XML_TOK_COMMENT:
5329 if (!reportComment(parser, parser->m_encoding, s, next))
5330 return XML_ERROR_NO_MEMORY;
5332 case XML_TOK_INVALID:
5333 parser->m_eventPtr = next;
5334 return XML_ERROR_INVALID_TOKEN;
5335 case XML_TOK_PARTIAL:
5336 if (!parser->m_parsingStatus.finalBuffer) {
5338 return XML_ERROR_NONE;
5340 return XML_ERROR_UNCLOSED_TOKEN;
5341 case XML_TOK_PARTIAL_CHAR:
5342 if (!parser->m_parsingStatus.finalBuffer) {
5344 return XML_ERROR_NONE;
5346 return XML_ERROR_PARTIAL_CHAR;
5348 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5350 parser->m_eventPtr = s = next;
5351 switch (parser->m_parsingStatus.parsing) {
5354 return XML_ERROR_NONE;
5356 return XML_ERROR_ABORTED;
5362 static enum XML_Error
5363 processInternalEntity(XML_Parser parser, ENTITY *entity,
5364 XML_Bool betweenDecl)
5366 const char *textStart, *textEnd;
5368 enum XML_Error result;
5369 OPEN_INTERNAL_ENTITY *openEntity;
5371 if (parser->m_freeInternalEntities) {
5372 openEntity = parser->m_freeInternalEntities;
5373 parser->m_freeInternalEntities = openEntity->next;
5376 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5378 return XML_ERROR_NO_MEMORY;
5380 entity->open = XML_TRUE;
5381 entity->processed = 0;
5382 openEntity->next = parser->m_openInternalEntities;
5383 parser->m_openInternalEntities = openEntity;
5384 openEntity->entity = entity;
5385 openEntity->startTagLevel = parser->m_tagLevel;
5386 openEntity->betweenDecl = betweenDecl;
5387 openEntity->internalEventPtr = NULL;
5388 openEntity->internalEventEndPtr = NULL;
5389 textStart = (char *)entity->textPtr;
5390 textEnd = (char *)(entity->textPtr + entity->textLen);
5391 /* Set a safe default value in case 'next' does not get set */
5395 if (entity->is_param) {
5396 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5397 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5398 next, &next, XML_FALSE);
5401 #endif /* XML_DTD */
5402 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
5403 textEnd, &next, XML_FALSE);
5405 if (result == XML_ERROR_NONE) {
5406 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5407 entity->processed = (int)(next - textStart);
5408 parser->m_processor = internalEntityProcessor;
5411 entity->open = XML_FALSE;
5412 parser->m_openInternalEntities = openEntity->next;
5413 /* put openEntity back in list of free instances */
5414 openEntity->next = parser->m_freeInternalEntities;
5415 parser->m_freeInternalEntities = openEntity;
5421 static enum XML_Error PTRCALL
5422 internalEntityProcessor(XML_Parser parser,
5425 const char **nextPtr)
5428 const char *textStart, *textEnd;
5430 enum XML_Error result;
5431 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5433 return XML_ERROR_UNEXPECTED_STATE;
5435 entity = openEntity->entity;
5436 textStart = ((char *)entity->textPtr) + entity->processed;
5437 textEnd = (char *)(entity->textPtr + entity->textLen);
5438 /* Set a safe default value in case 'next' does not get set */
5442 if (entity->is_param) {
5443 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5444 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
5445 next, &next, XML_FALSE);
5448 #endif /* XML_DTD */
5449 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
5450 textStart, textEnd, &next, XML_FALSE);
5452 if (result != XML_ERROR_NONE)
5454 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5455 entity->processed = (int)(next - (char *)entity->textPtr);
5459 entity->open = XML_FALSE;
5460 parser->m_openInternalEntities = openEntity->next;
5461 /* put openEntity back in list of free instances */
5462 openEntity->next = parser->m_freeInternalEntities;
5463 parser->m_freeInternalEntities = openEntity;
5467 if (entity->is_param) {
5469 parser->m_processor = prologProcessor;
5470 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5471 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5472 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5475 #endif /* XML_DTD */
5477 parser->m_processor = contentProcessor;
5478 /* see externalEntityContentProcessor vs contentProcessor */
5479 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5480 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
5484 static enum XML_Error PTRCALL
5485 errorProcessor(XML_Parser parser,
5486 const char *UNUSED_P(s),
5487 const char *UNUSED_P(end),
5488 const char **UNUSED_P(nextPtr))
5490 return parser->m_errorCode;
5493 static enum XML_Error
5494 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5495 const char *ptr, const char *end,
5498 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5502 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5504 if (!poolAppendChar(pool, XML_T('\0')))
5505 return XML_ERROR_NO_MEMORY;
5506 return XML_ERROR_NONE;
5509 static enum XML_Error
5510 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5511 const char *ptr, const char *end,
5514 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5517 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5520 return XML_ERROR_NONE;
5521 case XML_TOK_INVALID:
5522 if (enc == parser->m_encoding)
5523 parser->m_eventPtr = next;
5524 return XML_ERROR_INVALID_TOKEN;
5525 case XML_TOK_PARTIAL:
5526 if (enc == parser->m_encoding)
5527 parser->m_eventPtr = ptr;
5528 return XML_ERROR_INVALID_TOKEN;
5529 case XML_TOK_CHAR_REF:
5531 XML_Char buf[XML_ENCODE_MAX];
5533 int n = XmlCharRefNumber(enc, ptr);
5535 if (enc == parser->m_encoding)
5536 parser->m_eventPtr = ptr;
5537 return XML_ERROR_BAD_CHAR_REF;
5540 && n == 0x20 /* space */
5541 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5543 n = XmlEncode(n, (ICHAR *)buf);
5544 /* The XmlEncode() functions can never return 0 here. That
5545 * error return happens if the code point passed in is either
5546 * negative or greater than or equal to 0x110000. The
5547 * XmlCharRefNumber() functions will all return a number
5548 * strictly less than 0x110000 or a negative value if an error
5549 * occurred. The negative value is intercepted above, so
5550 * XmlEncode() is never passed a value it might return an
5553 for (i = 0; i < n; i++) {
5554 if (!poolAppendChar(pool, buf[i]))
5555 return XML_ERROR_NO_MEMORY;
5559 case XML_TOK_DATA_CHARS:
5560 if (!poolAppend(pool, enc, ptr, next))
5561 return XML_ERROR_NO_MEMORY;
5563 case XML_TOK_TRAILING_CR:
5564 next = ptr + enc->minBytesPerChar;
5566 case XML_TOK_ATTRIBUTE_VALUE_S:
5567 case XML_TOK_DATA_NEWLINE:
5568 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5570 if (!poolAppendChar(pool, 0x20))
5571 return XML_ERROR_NO_MEMORY;
5573 case XML_TOK_ENTITY_REF:
5575 const XML_Char *name;
5577 char checkEntityDecl;
5578 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5579 ptr + enc->minBytesPerChar,
5580 next - enc->minBytesPerChar);
5582 if (!poolAppendChar(pool, ch))
5583 return XML_ERROR_NO_MEMORY;
5586 name = poolStoreString(&parser->m_temp2Pool, enc,
5587 ptr + enc->minBytesPerChar,
5588 next - enc->minBytesPerChar);
5590 return XML_ERROR_NO_MEMORY;
5591 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5592 poolDiscard(&parser->m_temp2Pool);
5593 /* First, determine if a check for an existing declaration is needed;
5594 if yes, check that the entity exists, and that it is internal.
5596 if (pool == &dtd->pool) /* are we called from prolog? */
5599 parser->m_prologState.documentEntity &&
5600 #endif /* XML_DTD */
5602 ? !parser->m_openInternalEntities
5603 : !dtd->hasParamEntityRefs);
5604 else /* if (pool == &parser->m_tempPool): we are called from content */
5605 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5606 if (checkEntityDecl) {
5608 return XML_ERROR_UNDEFINED_ENTITY;
5609 else if (!entity->is_internal)
5610 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5613 /* Cannot report skipped entity here - see comments on
5614 parser->m_skippedEntityHandler.
5615 if (parser->m_skippedEntityHandler)
5616 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5618 /* Cannot call the default handler because this would be
5619 out of sync with the call to the startElementHandler.
5620 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5621 reportDefault(parser, enc, ptr, next);
5626 if (enc == parser->m_encoding) {
5627 /* It does not appear that this line can be executed.
5629 * The "if (entity->open)" check catches recursive entity
5630 * definitions. In order to be called with an open
5631 * entity, it must have gone through this code before and
5632 * been through the recursive call to
5633 * appendAttributeValue() some lines below. That call
5634 * sets the local encoding ("enc") to the parser's
5635 * internal encoding (internal_utf8 or internal_utf16),
5636 * which can never be the same as the principle encoding.
5637 * It doesn't appear there is another code path that gets
5638 * here with entity->open being TRUE.
5640 * Since it is not certain that this logic is watertight,
5641 * we keep the line and merely exclude it from coverage
5644 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5646 return XML_ERROR_RECURSIVE_ENTITY_REF;
5648 if (entity->notation) {
5649 if (enc == parser->m_encoding)
5650 parser->m_eventPtr = ptr;
5651 return XML_ERROR_BINARY_ENTITY_REF;
5653 if (!entity->textPtr) {
5654 if (enc == parser->m_encoding)
5655 parser->m_eventPtr = ptr;
5656 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5659 enum XML_Error result;
5660 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5661 entity->open = XML_TRUE;
5662 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
5663 (char *)entity->textPtr,
5664 (char *)textEnd, pool);
5665 entity->open = XML_FALSE;
5672 /* The only token returned by XmlAttributeValueTok() that does
5673 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5674 * Getting that would require an entity name to contain an
5675 * incomplete XML character (e.g. \xE2\x82); however previous
5676 * tokenisers will have already recognised and rejected such
5677 * names before XmlAttributeValueTok() gets a look-in. This
5678 * default case should be retained as a safety net, but the code
5679 * excluded from coverage tests.
5683 if (enc == parser->m_encoding)
5684 parser->m_eventPtr = ptr;
5685 return XML_ERROR_UNEXPECTED_STATE;
5686 /* LCOV_EXCL_STOP */
5693 static enum XML_Error
5694 storeEntityValue(XML_Parser parser,
5695 const ENCODING *enc,
5696 const char *entityTextPtr,
5697 const char *entityTextEnd)
5699 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
5700 STRING_POOL *pool = &(dtd->entityValuePool);
5701 enum XML_Error result = XML_ERROR_NONE;
5703 int oldInEntityValue = parser->m_prologState.inEntityValue;
5704 parser->m_prologState.inEntityValue = 1;
5705 #endif /* XML_DTD */
5706 /* never return Null for the value argument in EntityDeclHandler,
5707 since this would indicate an external entity; therefore we
5708 have to make sure that entityValuePool.start is not null */
5709 if (!pool->blocks) {
5710 if (!poolGrow(pool))
5711 return XML_ERROR_NO_MEMORY;
5716 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5718 case XML_TOK_PARAM_ENTITY_REF:
5720 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5721 const XML_Char *name;
5723 name = poolStoreString(&parser->m_tempPool, enc,
5724 entityTextPtr + enc->minBytesPerChar,
5725 next - enc->minBytesPerChar);
5727 result = XML_ERROR_NO_MEMORY;
5728 goto endEntityValue;
5730 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5731 poolDiscard(&parser->m_tempPool);
5733 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5734 /* cannot report skipped entity here - see comments on
5735 parser->m_skippedEntityHandler
5736 if (parser->m_skippedEntityHandler)
5737 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5739 dtd->keepProcessing = dtd->standalone;
5740 goto endEntityValue;
5743 if (enc == parser->m_encoding)
5744 parser->m_eventPtr = entityTextPtr;
5745 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5746 goto endEntityValue;
5748 if (entity->systemId) {
5749 if (parser->m_externalEntityRefHandler) {
5750 dtd->paramEntityRead = XML_FALSE;
5751 entity->open = XML_TRUE;
5752 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
5756 entity->publicId)) {
5757 entity->open = XML_FALSE;
5758 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5759 goto endEntityValue;
5761 entity->open = XML_FALSE;
5762 if (!dtd->paramEntityRead)
5763 dtd->keepProcessing = dtd->standalone;
5766 dtd->keepProcessing = dtd->standalone;
5769 entity->open = XML_TRUE;
5770 result = storeEntityValue(parser,
5771 parser->m_internalEncoding,
5772 (char *)entity->textPtr,
5773 (char *)(entity->textPtr
5774 + entity->textLen));
5775 entity->open = XML_FALSE;
5777 goto endEntityValue;
5781 #endif /* XML_DTD */
5782 /* In the internal subset, PE references are not legal
5783 within markup declarations, e.g entity values in this case. */
5784 parser->m_eventPtr = entityTextPtr;
5785 result = XML_ERROR_PARAM_ENTITY_REF;
5786 goto endEntityValue;
5788 result = XML_ERROR_NONE;
5789 goto endEntityValue;
5790 case XML_TOK_ENTITY_REF:
5791 case XML_TOK_DATA_CHARS:
5792 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5793 result = XML_ERROR_NO_MEMORY;
5794 goto endEntityValue;
5797 case XML_TOK_TRAILING_CR:
5798 next = entityTextPtr + enc->minBytesPerChar;
5800 case XML_TOK_DATA_NEWLINE:
5801 if (pool->end == pool->ptr && !poolGrow(pool)) {
5802 result = XML_ERROR_NO_MEMORY;
5803 goto endEntityValue;
5805 *(pool->ptr)++ = 0xA;
5807 case XML_TOK_CHAR_REF:
5809 XML_Char buf[XML_ENCODE_MAX];
5811 int n = XmlCharRefNumber(enc, entityTextPtr);
5813 if (enc == parser->m_encoding)
5814 parser->m_eventPtr = entityTextPtr;
5815 result = XML_ERROR_BAD_CHAR_REF;
5816 goto endEntityValue;
5818 n = XmlEncode(n, (ICHAR *)buf);
5819 /* The XmlEncode() functions can never return 0 here. That
5820 * error return happens if the code point passed in is either
5821 * negative or greater than or equal to 0x110000. The
5822 * XmlCharRefNumber() functions will all return a number
5823 * strictly less than 0x110000 or a negative value if an error
5824 * occurred. The negative value is intercepted above, so
5825 * XmlEncode() is never passed a value it might return an
5828 for (i = 0; i < n; i++) {
5829 if (pool->end == pool->ptr && !poolGrow(pool)) {
5830 result = XML_ERROR_NO_MEMORY;
5831 goto endEntityValue;
5833 *(pool->ptr)++ = buf[i];
5837 case XML_TOK_PARTIAL:
5838 if (enc == parser->m_encoding)
5839 parser->m_eventPtr = entityTextPtr;
5840 result = XML_ERROR_INVALID_TOKEN;
5841 goto endEntityValue;
5842 case XML_TOK_INVALID:
5843 if (enc == parser->m_encoding)
5844 parser->m_eventPtr = next;
5845 result = XML_ERROR_INVALID_TOKEN;
5846 goto endEntityValue;
5848 /* This default case should be unnecessary -- all the tokens
5849 * that XmlEntityValueTok() can return have their own explicit
5850 * cases -- but should be retained for safety. We do however
5851 * exclude it from the coverage statistics.
5855 if (enc == parser->m_encoding)
5856 parser->m_eventPtr = entityTextPtr;
5857 result = XML_ERROR_UNEXPECTED_STATE;
5858 goto endEntityValue;
5859 /* LCOV_EXCL_STOP */
5861 entityTextPtr = next;
5865 parser->m_prologState.inEntityValue = oldInEntityValue;
5866 #endif /* XML_DTD */
5870 static void FASTCALL
5871 normalizeLines(XML_Char *s)
5875 if (*s == XML_T('\0'))
5894 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5895 const char *start, const char *end)
5897 const XML_Char *target;
5900 if (!parser->m_processingInstructionHandler) {
5901 if (parser->m_defaultHandler)
5902 reportDefault(parser, enc, start, end);
5905 start += enc->minBytesPerChar * 2;
5906 tem = start + XmlNameLength(enc, start);
5907 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
5910 poolFinish(&parser->m_tempPool);
5911 data = poolStoreString(&parser->m_tempPool, enc,
5913 end - enc->minBytesPerChar*2);
5916 normalizeLines(data);
5917 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5918 poolClear(&parser->m_tempPool);
5923 reportComment(XML_Parser parser, const ENCODING *enc,
5924 const char *start, const char *end)
5927 if (!parser->m_commentHandler) {
5928 if (parser->m_defaultHandler)
5929 reportDefault(parser, enc, start, end);
5932 data = poolStoreString(&parser->m_tempPool,
5934 start + enc->minBytesPerChar * 4,
5935 end - enc->minBytesPerChar * 3);
5938 normalizeLines(data);
5939 parser->m_commentHandler(parser->m_handlerArg, data);
5940 poolClear(&parser->m_tempPool);
5945 reportDefault(XML_Parser parser, const ENCODING *enc,
5946 const char *s, const char *end)
5948 if (MUST_CONVERT(enc, s)) {
5949 enum XML_Convert_Result convert_res;
5950 const char **eventPP;
5951 const char **eventEndPP;
5952 if (enc == parser->m_encoding) {
5953 eventPP = &parser->m_eventPtr;
5954 eventEndPP = &parser->m_eventEndPtr;
5957 /* To get here, two things must be true; the parser must be
5958 * using a character encoding that is not the same as the
5959 * encoding passed in, and the encoding passed in must need
5960 * conversion to the internal format (UTF-8 unless XML_UNICODE
5961 * is defined). The only occasions on which the encoding passed
5962 * in is not the same as the parser's encoding are when it is
5963 * the internal encoding (e.g. a previously defined parameter
5964 * entity, already converted to internal format). This by
5965 * definition doesn't need conversion, so the whole branch never
5968 * For safety's sake we don't delete these lines and merely
5969 * exclude them from coverage statistics.
5973 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5974 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
5975 /* LCOV_EXCL_STOP */
5978 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
5979 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
5981 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
5983 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
5986 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
5991 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5992 XML_Bool isId, const XML_Char *value, XML_Parser parser)
5994 DEFAULT_ATTRIBUTE *att;
5995 if (value || isId) {
5996 /* The handling of default attributes gets messed up if we have
5997 a default which duplicates a non-default. */
5999 for (i = 0; i < type->nDefaultAtts; i++)
6000 if (attId == type->defaultAtts[i].id)
6002 if (isId && !type->idAtt && !attId->xmlns)
6003 type->idAtt = attId;
6005 if (type->nDefaultAtts == type->allocDefaultAtts) {
6006 if (type->allocDefaultAtts == 0) {
6007 type->allocDefaultAtts = 8;
6008 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
6009 * sizeof(DEFAULT_ATTRIBUTE));
6010 if (!type->defaultAtts) {
6011 type->allocDefaultAtts = 0;
6016 DEFAULT_ATTRIBUTE *temp;
6017 int count = type->allocDefaultAtts * 2;
6018 temp = (DEFAULT_ATTRIBUTE *)
6019 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
6022 type->allocDefaultAtts = count;
6023 type->defaultAtts = temp;
6026 att = type->defaultAtts + type->nDefaultAtts;
6029 att->isCdata = isCdata;
6031 attId->maybeTokenized = XML_TRUE;
6032 type->nDefaultAtts += 1;
6037 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
6039 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6040 const XML_Char *name;
6041 for (name = elementType->name; *name; name++) {
6042 if (*name == XML_T(ASCII_COLON)) {
6045 for (s = elementType->name; s != name; s++) {
6046 if (!poolAppendChar(&dtd->pool, *s))
6049 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6051 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6055 if (prefix->name == poolStart(&dtd->pool))
6056 poolFinish(&dtd->pool);
6058 poolDiscard(&dtd->pool);
6059 elementType->prefix = prefix;
6066 static ATTRIBUTE_ID *
6067 getAttributeId(XML_Parser parser, const ENCODING *enc,
6068 const char *start, const char *end)
6070 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6072 const XML_Char *name;
6073 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6075 name = poolStoreString(&dtd->pool, enc, start, end);
6078 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6080 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
6083 if (id->name != name)
6084 poolDiscard(&dtd->pool);
6086 poolFinish(&dtd->pool);
6089 else if (name[0] == XML_T(ASCII_x)
6090 && name[1] == XML_T(ASCII_m)
6091 && name[2] == XML_T(ASCII_l)
6092 && name[3] == XML_T(ASCII_n)
6093 && name[4] == XML_T(ASCII_s)
6094 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6095 if (name[5] == XML_T('\0'))
6096 id->prefix = &dtd->defaultPrefix;
6098 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
6099 id->xmlns = XML_TRUE;
6103 for (i = 0; name[i]; i++) {
6104 /* attributes without prefix are *not* in the default namespace */
6105 if (name[i] == XML_T(ASCII_COLON)) {
6107 for (j = 0; j < i; j++) {
6108 if (!poolAppendChar(&dtd->pool, name[j]))
6111 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6113 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6117 if (id->prefix->name == poolStart(&dtd->pool))
6118 poolFinish(&dtd->pool);
6120 poolDiscard(&dtd->pool);
6129 #define CONTEXT_SEP XML_T(ASCII_FF)
6131 static const XML_Char *
6132 getContext(XML_Parser parser)
6134 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6135 HASH_TABLE_ITER iter;
6136 XML_Bool needSep = XML_FALSE;
6138 if (dtd->defaultPrefix.binding) {
6141 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6143 len = dtd->defaultPrefix.binding->uriLen;
6144 if (parser->m_namespaceSeparator)
6146 for (i = 0; i < len; i++) {
6147 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
6148 /* Because of memory caching, I don't believe this line can be
6151 * This is part of a loop copying the default prefix binding
6152 * URI into the parser's temporary string pool. Previously,
6153 * that URI was copied into the same string pool, with a
6154 * terminating NUL character, as part of setContext(). When
6155 * the pool was cleared, that leaves a block definitely big
6156 * enough to hold the URI on the free block list of the pool.
6157 * The URI copy in getContext() therefore cannot run out of
6160 * If the pool is used between the setContext() and
6161 * getContext() calls, the worst it can do is leave a bigger
6162 * block on the front of the free list. Given that this is
6163 * all somewhat inobvious and program logic can be changed, we
6164 * don't delete the line but we do exclude it from the test
6165 * coverage statistics.
6167 return NULL; /* LCOV_EXCL_LINE */
6173 hashTableIterInit(&iter, &(dtd->prefixes));
6178 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6181 if (!prefix->binding) {
6182 /* This test appears to be (justifiable) paranoia. There does
6183 * not seem to be a way of injecting a prefix without a binding
6184 * that doesn't get errored long before this function is called.
6185 * The test should remain for safety's sake, so we instead
6186 * exclude the following line from the coverage statistics.
6188 continue; /* LCOV_EXCL_LINE */
6190 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6192 for (s = prefix->name; *s; s++)
6193 if (!poolAppendChar(&parser->m_tempPool, *s))
6195 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6197 len = prefix->binding->uriLen;
6198 if (parser->m_namespaceSeparator)
6200 for (i = 0; i < len; i++)
6201 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6207 hashTableIterInit(&iter, &(dtd->generalEntities));
6210 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6215 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6217 for (s = e->name; *s; s++)
6218 if (!poolAppendChar(&parser->m_tempPool, *s))
6223 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6225 return parser->m_tempPool.start;
6229 setContext(XML_Parser parser, const XML_Char *context)
6231 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
6232 const XML_Char *s = context;
6234 while (*context != XML_T('\0')) {
6235 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6237 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6239 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
6242 if (*s != XML_T('\0'))
6245 poolDiscard(&parser->m_tempPool);
6247 else if (*s == XML_T(ASCII_EQUALS)) {
6249 if (poolLength(&parser->m_tempPool) == 0)
6250 prefix = &dtd->defaultPrefix;
6252 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6254 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
6258 if (prefix->name == poolStart(&parser->m_tempPool)) {
6259 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6263 poolDiscard(&parser->m_tempPool);
6265 for (context = s + 1;
6266 *context != CONTEXT_SEP && *context != XML_T('\0');
6268 if (!poolAppendChar(&parser->m_tempPool, *context))
6270 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6272 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6273 &parser->m_inheritedBindings) != XML_ERROR_NONE)
6275 poolDiscard(&parser->m_tempPool);
6276 if (*context != XML_T('\0'))
6281 if (!poolAppendChar(&parser->m_tempPool, *s))
6289 static void FASTCALL
6290 normalizePublicId(XML_Char *publicId)
6292 XML_Char *p = publicId;
6294 for (s = publicId; *s; s++) {
6299 if (p != publicId && p[-1] != 0x20)
6306 if (p != publicId && p[-1] == 0x20)
6312 dtdCreate(const XML_Memory_Handling_Suite *ms)
6314 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6317 poolInit(&(p->pool), ms);
6318 poolInit(&(p->entityValuePool), ms);
6319 hashTableInit(&(p->generalEntities), ms);
6320 hashTableInit(&(p->elementTypes), ms);
6321 hashTableInit(&(p->attributeIds), ms);
6322 hashTableInit(&(p->prefixes), ms);
6324 p->paramEntityRead = XML_FALSE;
6325 hashTableInit(&(p->paramEntities), ms);
6326 #endif /* XML_DTD */
6327 p->defaultPrefix.name = NULL;
6328 p->defaultPrefix.binding = NULL;
6330 p->in_eldecl = XML_FALSE;
6331 p->scaffIndex = NULL;
6336 p->contentStringLen = 0;
6338 p->keepProcessing = XML_TRUE;
6339 p->hasParamEntityRefs = XML_FALSE;
6340 p->standalone = XML_FALSE;
6345 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
6347 HASH_TABLE_ITER iter;
6348 hashTableIterInit(&iter, &(p->elementTypes));
6350 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6353 if (e->allocDefaultAtts != 0)
6354 ms->free_fcn(e->defaultAtts);
6356 hashTableClear(&(p->generalEntities));
6358 p->paramEntityRead = XML_FALSE;
6359 hashTableClear(&(p->paramEntities));
6360 #endif /* XML_DTD */
6361 hashTableClear(&(p->elementTypes));
6362 hashTableClear(&(p->attributeIds));
6363 hashTableClear(&(p->prefixes));
6364 poolClear(&(p->pool));
6365 poolClear(&(p->entityValuePool));
6366 p->defaultPrefix.name = NULL;
6367 p->defaultPrefix.binding = NULL;
6369 p->in_eldecl = XML_FALSE;
6371 ms->free_fcn(p->scaffIndex);
6372 p->scaffIndex = NULL;
6373 ms->free_fcn(p->scaffold);
6379 p->contentStringLen = 0;
6381 p->keepProcessing = XML_TRUE;
6382 p->hasParamEntityRefs = XML_FALSE;
6383 p->standalone = XML_FALSE;
6387 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6389 HASH_TABLE_ITER iter;
6390 hashTableIterInit(&iter, &(p->elementTypes));
6392 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6395 if (e->allocDefaultAtts != 0)
6396 ms->free_fcn(e->defaultAtts);
6398 hashTableDestroy(&(p->generalEntities));
6400 hashTableDestroy(&(p->paramEntities));
6401 #endif /* XML_DTD */
6402 hashTableDestroy(&(p->elementTypes));
6403 hashTableDestroy(&(p->attributeIds));
6404 hashTableDestroy(&(p->prefixes));
6405 poolDestroy(&(p->pool));
6406 poolDestroy(&(p->entityValuePool));
6408 ms->free_fcn(p->scaffIndex);
6409 ms->free_fcn(p->scaffold);
6414 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6415 The new DTD has already been initialized.
6418 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
6420 HASH_TABLE_ITER iter;
6422 /* Copy the prefix table. */
6424 hashTableIterInit(&iter, &(oldDtd->prefixes));
6426 const XML_Char *name;
6427 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6430 name = poolCopyString(&(newDtd->pool), oldP->name);
6433 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6437 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6439 /* Copy the attribute id table. */
6443 const XML_Char *name;
6444 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6448 /* Remember to allocate the scratch byte before the name. */
6449 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6451 name = poolCopyString(&(newDtd->pool), oldA->name);
6455 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6456 sizeof(ATTRIBUTE_ID));
6459 newA->maybeTokenized = oldA->maybeTokenized;
6461 newA->xmlns = oldA->xmlns;
6462 if (oldA->prefix == &oldDtd->defaultPrefix)
6463 newA->prefix = &newDtd->defaultPrefix;
6465 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6466 oldA->prefix->name, 0);
6470 /* Copy the element type table. */
6472 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6477 const XML_Char *name;
6478 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6481 name = poolCopyString(&(newDtd->pool), oldE->name);
6484 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6485 sizeof(ELEMENT_TYPE));
6488 if (oldE->nDefaultAtts) {
6489 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6490 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6491 if (!newE->defaultAtts) {
6496 newE->idAtt = (ATTRIBUTE_ID *)
6497 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
6498 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6500 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6501 oldE->prefix->name, 0);
6502 for (i = 0; i < newE->nDefaultAtts; i++) {
6503 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
6504 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6505 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6506 if (oldE->defaultAtts[i].value) {
6507 newE->defaultAtts[i].value
6508 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6509 if (!newE->defaultAtts[i].value)
6513 newE->defaultAtts[i].value = NULL;
6517 /* Copy the entity tables. */
6518 if (!copyEntityTable(oldParser,
6519 &(newDtd->generalEntities),
6521 &(oldDtd->generalEntities)))
6525 if (!copyEntityTable(oldParser,
6526 &(newDtd->paramEntities),
6528 &(oldDtd->paramEntities)))
6530 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6531 #endif /* XML_DTD */
6533 newDtd->keepProcessing = oldDtd->keepProcessing;
6534 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6535 newDtd->standalone = oldDtd->standalone;
6537 /* Don't want deep copying for scaffolding */
6538 newDtd->in_eldecl = oldDtd->in_eldecl;
6539 newDtd->scaffold = oldDtd->scaffold;
6540 newDtd->contentStringLen = oldDtd->contentStringLen;
6541 newDtd->scaffSize = oldDtd->scaffSize;
6542 newDtd->scaffLevel = oldDtd->scaffLevel;
6543 newDtd->scaffIndex = oldDtd->scaffIndex;
6549 copyEntityTable(XML_Parser oldParser,
6550 HASH_TABLE *newTable,
6551 STRING_POOL *newPool,
6552 const HASH_TABLE *oldTable)
6554 HASH_TABLE_ITER iter;
6555 const XML_Char *cachedOldBase = NULL;
6556 const XML_Char *cachedNewBase = NULL;
6558 hashTableIterInit(&iter, oldTable);
6562 const XML_Char *name;
6563 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6566 name = poolCopyString(newPool, oldE->name);
6569 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6572 if (oldE->systemId) {
6573 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6576 newE->systemId = tem;
6578 if (oldE->base == cachedOldBase)
6579 newE->base = cachedNewBase;
6581 cachedOldBase = oldE->base;
6582 tem = poolCopyString(newPool, cachedOldBase);
6585 cachedNewBase = newE->base = tem;
6588 if (oldE->publicId) {
6589 tem = poolCopyString(newPool, oldE->publicId);
6592 newE->publicId = tem;
6596 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6600 newE->textPtr = tem;
6601 newE->textLen = oldE->textLen;
6603 if (oldE->notation) {
6604 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6607 newE->notation = tem;
6609 newE->is_param = oldE->is_param;
6610 newE->is_internal = oldE->is_internal;
6615 #define INIT_POWER 6
6617 static XML_Bool FASTCALL
6618 keyeq(KEY s1, KEY s2)
6620 for (; *s1 == *s2; s1++, s2++)
6630 for (; *s; s++, len++);
6635 copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6638 key->k[1] = get_hash_secret_salt(parser);
6641 static unsigned long FASTCALL
6642 hash(XML_Parser parser, KEY s)
6644 struct siphash state;
6648 copy_salt_to_sipkey(parser, &key);
6649 sip24_init(&state, &key);
6650 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6651 return (unsigned long)sip24_final(&state);
6655 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
6658 if (table->size == 0) {
6662 table->power = INIT_POWER;
6663 /* table->size is a power of 2 */
6664 table->size = (size_t)1 << INIT_POWER;
6665 tsize = table->size * sizeof(NAMED *);
6666 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
6671 memset(table->v, 0, tsize);
6672 i = hash(parser, name) & ((unsigned long)table->size - 1);
6675 unsigned long h = hash(parser, name);
6676 unsigned long mask = (unsigned long)table->size - 1;
6677 unsigned char step = 0;
6679 while (table->v[i]) {
6680 if (keyeq(name, table->v[i]->name))
6683 step = PROBE_STEP(h, mask, table->power);
6684 i < step ? (i += table->size - step) : (i -= step);
6689 /* check for overflow (table is half full) */
6690 if (table->used >> (table->power - 1)) {
6691 unsigned char newPower = table->power + 1;
6692 size_t newSize = (size_t)1 << newPower;
6693 unsigned long newMask = (unsigned long)newSize - 1;
6694 size_t tsize = newSize * sizeof(NAMED *);
6695 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
6698 memset(newV, 0, tsize);
6699 for (i = 0; i < table->size; i++)
6701 unsigned long newHash = hash(parser, table->v[i]->name);
6702 size_t j = newHash & newMask;
6706 step = PROBE_STEP(newHash, newMask, newPower);
6707 j < step ? (j += newSize - step) : (j -= step);
6709 newV[j] = table->v[i];
6711 table->mem->free_fcn(table->v);
6713 table->power = newPower;
6714 table->size = newSize;
6717 while (table->v[i]) {
6719 step = PROBE_STEP(h, newMask, newPower);
6720 i < step ? (i += newSize - step) : (i -= step);
6724 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
6727 memset(table->v[i], 0, createSize);
6728 table->v[i]->name = name;
6733 static void FASTCALL
6734 hashTableClear(HASH_TABLE *table)
6737 for (i = 0; i < table->size; i++) {
6738 table->mem->free_fcn(table->v[i]);
6744 static void FASTCALL
6745 hashTableDestroy(HASH_TABLE *table)
6748 for (i = 0; i < table->size; i++)
6749 table->mem->free_fcn(table->v[i]);
6750 table->mem->free_fcn(table->v);
6753 static void FASTCALL
6754 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
6763 static void FASTCALL
6764 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
6767 iter->end = iter->p + table->size;
6770 static NAMED * FASTCALL
6771 hashTableIterNext(HASH_TABLE_ITER *iter)
6773 while (iter->p != iter->end) {
6774 NAMED *tem = *(iter->p)++;
6781 static void FASTCALL
6782 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
6784 pool->blocks = NULL;
6785 pool->freeBlocks = NULL;
6792 static void FASTCALL
6793 poolClear(STRING_POOL *pool)
6795 if (!pool->freeBlocks)
6796 pool->freeBlocks = pool->blocks;
6798 BLOCK *p = pool->blocks;
6800 BLOCK *tem = p->next;
6801 p->next = pool->freeBlocks;
6802 pool->freeBlocks = p;
6806 pool->blocks = NULL;
6812 static void FASTCALL
6813 poolDestroy(STRING_POOL *pool)
6815 BLOCK *p = pool->blocks;
6817 BLOCK *tem = p->next;
6818 pool->mem->free_fcn(p);
6821 p = pool->freeBlocks;
6823 BLOCK *tem = p->next;
6824 pool->mem->free_fcn(p);
6830 poolAppend(STRING_POOL *pool, const ENCODING *enc,
6831 const char *ptr, const char *end)
6833 if (!pool->ptr && !poolGrow(pool))
6836 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6837 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
6839 if (!poolGrow(pool))
6845 static const XML_Char * FASTCALL
6846 poolCopyString(STRING_POOL *pool, const XML_Char *s)
6849 if (!poolAppendChar(pool, *s))
6857 static const XML_Char *
6858 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
6860 if (!pool->ptr && !poolGrow(pool)) {
6861 /* The following line is unreachable given the current usage of
6862 * poolCopyStringN(). Currently it is called from exactly one
6863 * place to copy the text of a simple general entity. By that
6864 * point, the name of the entity is already stored in the pool, so
6865 * pool->ptr cannot be NULL.
6867 * If poolCopyStringN() is used elsewhere as it well might be,
6868 * this line may well become executable again. Regardless, this
6869 * sort of check shouldn't be removed lightly, so we just exclude
6870 * it from the coverage statistics.
6872 return NULL; /* LCOV_EXCL_LINE */
6874 for (; n > 0; --n, s++) {
6875 if (!poolAppendChar(pool, *s))
6883 static const XML_Char * FASTCALL
6884 poolAppendString(STRING_POOL *pool, const XML_Char *s)
6887 if (!poolAppendChar(pool, *s))
6895 poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6896 const char *ptr, const char *end)
6898 if (!poolAppend(pool, enc, ptr, end))
6900 if (pool->ptr == pool->end && !poolGrow(pool))
6907 poolBytesToAllocateFor(int blockSize)
6909 /* Unprotected math would be:
6910 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6912 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6913 ** For a + b * c we check b * c in isolation first, so that addition of a
6914 ** on top has no chance of making us accept a small non-negative number
6916 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6921 if (blockSize > (int)(INT_MAX / stretch))
6925 const int stretchedBlockSize = blockSize * (int)stretch;
6926 const int bytesToAllocate = (int)(
6927 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6928 if (bytesToAllocate < 0)
6931 return (size_t)bytesToAllocate;
6935 static XML_Bool FASTCALL
6936 poolGrow(STRING_POOL *pool)
6938 if (pool->freeBlocks) {
6939 if (pool->start == 0) {
6940 pool->blocks = pool->freeBlocks;
6941 pool->freeBlocks = pool->freeBlocks->next;
6942 pool->blocks->next = NULL;
6943 pool->start = pool->blocks->s;
6944 pool->end = pool->start + pool->blocks->size;
6945 pool->ptr = pool->start;
6948 if (pool->end - pool->start < pool->freeBlocks->size) {
6949 BLOCK *tem = pool->freeBlocks->next;
6950 pool->freeBlocks->next = pool->blocks;
6951 pool->blocks = pool->freeBlocks;
6952 pool->freeBlocks = tem;
6953 memcpy(pool->blocks->s, pool->start,
6954 (pool->end - pool->start) * sizeof(XML_Char));
6955 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6956 pool->start = pool->blocks->s;
6957 pool->end = pool->start + pool->blocks->size;
6961 if (pool->blocks && pool->start == pool->blocks->s) {
6963 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
6964 size_t bytesToAllocate;
6966 /* NOTE: Needs to be calculated prior to calling `realloc`
6967 to avoid dangling pointers: */
6968 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6970 if (blockSize < 0) {
6971 /* This condition traps a situation where either more than
6972 * INT_MAX/2 bytes have already been allocated. This isn't
6973 * readily testable, since it is unlikely that an average
6974 * machine will have that much memory, so we exclude it from the
6975 * coverage statistics.
6977 return XML_FALSE; /* LCOV_EXCL_LINE */
6980 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6981 if (bytesToAllocate == 0)
6985 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
6988 pool->blocks = temp;
6989 pool->blocks->size = blockSize;
6990 pool->ptr = pool->blocks->s + offsetInsideBlock;
6991 pool->start = pool->blocks->s;
6992 pool->end = pool->start + blockSize;
6996 int blockSize = (int)(pool->end - pool->start);
6997 size_t bytesToAllocate;
6999 if (blockSize < 0) {
7000 /* This condition traps a situation where either more than
7001 * INT_MAX bytes have already been allocated (which is prevented
7002 * by various pieces of program logic, not least this one, never
7003 * mind the unlikelihood of actually having that much memory) or
7004 * the pool control fields have been corrupted (which could
7005 * conceivably happen in an extremely buggy user handler
7006 * function). Either way it isn't readily testable, so we
7007 * exclude it from the coverage statistics.
7009 return XML_FALSE; /* LCOV_EXCL_LINE */
7012 if (blockSize < INIT_BLOCK_SIZE)
7013 blockSize = INIT_BLOCK_SIZE;
7015 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7016 if ((int)((unsigned)blockSize * 2U) < 0) {
7022 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7023 if (bytesToAllocate == 0)
7026 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
7029 tem->size = blockSize;
7030 tem->next = pool->blocks;
7032 if (pool->ptr != pool->start)
7033 memcpy(tem->s, pool->start,
7034 (pool->ptr - pool->start) * sizeof(XML_Char));
7035 pool->ptr = tem->s + (pool->ptr - pool->start);
7036 pool->start = tem->s;
7037 pool->end = tem->s + blockSize;
7043 nextScaffoldPart(XML_Parser parser)
7045 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7046 CONTENT_SCAFFOLD * me;
7049 if (!dtd->scaffIndex) {
7050 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7051 if (!dtd->scaffIndex)
7053 dtd->scaffIndex[0] = 0;
7056 if (dtd->scaffCount >= dtd->scaffSize) {
7057 CONTENT_SCAFFOLD *temp;
7058 if (dtd->scaffold) {
7059 temp = (CONTENT_SCAFFOLD *)
7060 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7063 dtd->scaffSize *= 2;
7066 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7067 * sizeof(CONTENT_SCAFFOLD));
7070 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7072 dtd->scaffold = temp;
7074 next = dtd->scaffCount++;
7075 me = &dtd->scaffold[next];
7076 if (dtd->scaffLevel) {
7077 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
7078 if (parent->lastchild) {
7079 dtd->scaffold[parent->lastchild].nextsib = next;
7081 if (!parent->childcnt)
7082 parent->firstchild = next;
7083 parent->lastchild = next;
7086 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7091 build_node(XML_Parser parser,
7094 XML_Content **contpos,
7097 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7098 dest->type = dtd->scaffold[src_node].type;
7099 dest->quant = dtd->scaffold[src_node].quant;
7100 if (dest->type == XML_CTYPE_NAME) {
7101 const XML_Char *src;
7102 dest->name = *strpos;
7103 src = dtd->scaffold[src_node].name;
7105 *(*strpos)++ = *src;
7110 dest->numchildren = 0;
7111 dest->children = NULL;
7116 dest->numchildren = dtd->scaffold[src_node].childcnt;
7117 dest->children = *contpos;
7118 *contpos += dest->numchildren;
7119 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7120 i < dest->numchildren;
7121 i++, cn = dtd->scaffold[cn].nextsib) {
7122 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7128 static XML_Content *
7129 build_model (XML_Parser parser)
7131 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7135 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7136 + (dtd->contentStringLen * sizeof(XML_Char)));
7138 ret = (XML_Content *)MALLOC(parser, allocsize);
7142 str = (XML_Char *) (&ret[dtd->scaffCount]);
7145 build_node(parser, 0, ret, &cpos, &str);
7149 static ELEMENT_TYPE *
7150 getElementType(XML_Parser parser,
7151 const ENCODING *enc,
7155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
7156 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7161 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
7164 if (ret->name != name)
7165 poolDiscard(&dtd->pool);
7167 poolFinish(&dtd->pool);
7168 if (!setElementTypePrefix(parser, ret))
7175 copyString(const XML_Char *s,
7176 const XML_Memory_Handling_Suite *memsuite)
7178 int charsRequired = 0;
7181 /* First determine how long the string is */
7182 while (s[charsRequired] != 0) {
7185 /* Include the terminator */
7188 /* Now allocate space for the copy */
7189 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7192 /* Copy the original into place */
7193 memcpy(result, s, charsRequired * sizeof(XML_Char));