2 * pugixml parser - version 1.12
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at https://pugixml.org/
7 * This library is distributed under the MIT License. See notice at the end
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
17 #include "pugixml.hpp"
25 #ifdef PUGIXML_WCHAR_MODE
29 #ifndef PUGIXML_NO_XPATH
34 #ifndef PUGIXML_NO_STL
44 # pragma warning(push)
45 # pragma warning(disable: 4127) // conditional expression is constant
46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 # pragma warning(disable: 4702) // unreachable code
48 # pragma warning(disable: 4996) // this function or variable may be unsafe
51 #if defined(_MSC_VER) && defined(__c2__)
52 # pragma clang diagnostic push
53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
56 #ifdef __INTEL_COMPILER
57 # pragma warning(disable: 177) // function was declared but never referenced
58 # pragma warning(disable: 279) // controlling expression is constant
59 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
69 # pragma warn -8008 // condition is always false
70 # pragma warn -8066 // unreachable code
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 # pragma diag_suppress=178 // function was declared but never referenced
76 # pragma diag_suppress=237 // controlling expression is constant
79 #ifdef __TI_COMPILER_VERSION__
80 # pragma diag_suppress 179 // function was declared but never referenced
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 # define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 # define PUGI__NO_INLINE __attribute__((noinline))
89 # define PUGI__NO_INLINE
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
96 # define PUGI__UNLIKELY(cond) (cond)
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
104 # define PUGI__DMC_VOLATILE volatile
106 # define PUGI__DMC_VOLATILE
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 # if __has_attribute(no_sanitize)
112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
114 # define PUGI__UNSIGNED_OVERFLOW
117 # define PUGI__UNSIGNED_OVERFLOW
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 # define LLONG_MIN (-LLONG_MAX - 1LL)
130 # define LLONG_MAX __LONG_LONG_MAX__
131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
136 # define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #elif defined(_WIN32_WCE)
138 # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1
141 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
142 #if __cplusplus >= 201103
143 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
144 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
145 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
147 # define PUGI__SNPRINTF sprintf
150 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
151 #ifdef PUGIXML_HEADER_ONLY
152 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
153 # define PUGI__NS_END } }
154 # define PUGI__FN inline
155 # define PUGI__FN_NO_INLINE inline
157 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
158 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
159 # define PUGI__NS_END } }
161 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
162 # define PUGI__NS_END } } }
165 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
169 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
172 # ifndef _UINTPTR_T_DEFINED
173 typedef size_t uintptr_t;
176 typedef unsigned __int8 uint8_t;
177 typedef unsigned __int16 uint16_t;
178 typedef unsigned __int32 uint32_t;
186 PUGI__FN void* default_allocate(size_t size)
191 PUGI__FN void default_deallocate(void* ptr)
196 template <typename T>
197 struct xml_memory_management_function_storage
199 static allocation_function allocate;
200 static deallocation_function deallocate;
203 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
204 // Without a template<> we'll get multiple definitions of the same static
205 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
206 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
208 typedef xml_memory_management_function_storage<int> xml_memory;
214 PUGI__FN size_t strlength(const char_t* s)
218 #ifdef PUGIXML_WCHAR_MODE
225 // Compare two strings
226 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
230 #ifdef PUGIXML_WCHAR_MODE
231 return wcscmp(src, dst) == 0;
233 return strcmp(src, dst) == 0;
237 // Compare lhs with [rhs_begin, rhs_end)
238 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
240 for (size_t i = 0; i < count; ++i)
241 if (lhs[i] != rhs[i])
244 return lhs[count] == 0;
247 // Get length of wide string, even if CRT lacks wide character support
248 PUGI__FN size_t strlength_wide(const wchar_t* s)
252 #ifdef PUGIXML_WCHAR_MODE
255 const wchar_t* end = s;
257 return static_cast<size_t>(end - s);
262 // auto_ptr-like object for exception recovery
264 template <typename T> struct auto_deleter
266 typedef void (*D)(T*);
271 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
277 if (data) deleter(data);
289 #ifdef PUGIXML_COMPACT
291 class compact_hash_table
294 compact_hash_table(): _items(0), _capacity(0), _count(0)
302 xml_memory::deallocate(_items);
309 void* find(const void* key)
311 if (_capacity == 0) return 0;
313 item_t* item = get_item(key);
315 assert(item->key == key || (item->key == 0 && item->value == 0));
320 void insert(const void* key, void* value)
322 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
324 item_t* item = get_item(key);
336 bool reserve(size_t extra = 16)
338 if (_count + extra >= _capacity - _capacity / 4)
339 return rehash(_count + extra);
356 bool rehash(size_t count);
358 item_t* get_item(const void* key)
361 assert(_capacity > 0);
363 size_t hashmod = _capacity - 1;
364 size_t bucket = hash(key) & hashmod;
366 for (size_t probe = 0; probe <= hashmod; ++probe)
368 item_t& probe_item = _items[bucket];
370 if (probe_item.key == key || probe_item.key == 0)
373 // hash collision, quadratic probing
374 bucket = (bucket + probe + 1) & hashmod;
377 assert(false && "Hash table is full"); // unreachable
381 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
383 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
385 // MurmurHash3 32-bit finalizer
396 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
398 size_t capacity = 32;
399 while (count >= capacity - capacity / 4)
402 compact_hash_table rt;
403 rt._capacity = capacity;
404 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
409 memset(rt._items, 0, sizeof(item_t) * capacity);
411 for (size_t i = 0; i < _capacity; ++i)
413 rt.insert(_items[i].key, _items[i].value);
416 xml_memory::deallocate(_items);
418 _capacity = capacity;
421 assert(_count == rt._count);
430 #ifdef PUGIXML_COMPACT
431 static const uintptr_t xml_memory_block_alignment = 4;
433 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
436 // extra metadata bits
437 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
438 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
439 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
440 static const uintptr_t xml_memory_page_type_mask = 15;
442 // combined masks for string uniqueness
443 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
444 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
446 #ifdef PUGIXML_COMPACT
447 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
448 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
450 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
451 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
452 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
455 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
456 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
458 struct xml_allocator;
460 struct xml_memory_page
462 static xml_memory_page* construct(void* memory)
464 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
466 result->allocator = 0;
469 result->busy_size = 0;
470 result->freed_size = 0;
472 #ifdef PUGIXML_COMPACT
473 result->compact_string_base = 0;
474 result->compact_shared_parent = 0;
475 result->compact_page_marker = 0;
481 xml_allocator* allocator;
483 xml_memory_page* prev;
484 xml_memory_page* next;
489 #ifdef PUGIXML_COMPACT
490 char_t* compact_string_base;
491 void* compact_shared_parent;
492 uint32_t* compact_page_marker;
496 static const size_t xml_memory_page_size =
497 #ifdef PUGIXML_MEMORY_PAGE_SIZE
498 (PUGIXML_MEMORY_PAGE_SIZE)
502 - sizeof(xml_memory_page);
504 struct xml_memory_string_header
506 uint16_t page_offset; // offset from page->data
507 uint16_t full_size; // 0 if string occupies whole page
512 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
514 #ifdef PUGIXML_COMPACT
519 xml_memory_page* allocate_page(size_t data_size)
521 size_t size = sizeof(xml_memory_page) + data_size;
523 // allocate block with some alignment, leaving memory for worst-case padding
524 void* memory = xml_memory::allocate(size);
525 if (!memory) return 0;
527 // prepare page structure
528 xml_memory_page* page = xml_memory_page::construct(memory);
531 assert(this == _root->allocator);
532 page->allocator = this;
537 static void deallocate_page(xml_memory_page* page)
539 xml_memory::deallocate(page);
542 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
544 void* allocate_memory(size_t size, xml_memory_page*& out_page)
546 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
547 return allocate_memory_oob(size, out_page);
549 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
558 #ifdef PUGIXML_COMPACT
559 void* allocate_object(size_t size, xml_memory_page*& out_page)
561 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
562 if (!result) return 0;
565 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
567 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
570 uint32_t* marker = static_cast<uint32_t*>(result);
572 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
573 out_page->compact_page_marker = marker;
575 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
576 // this will make sure deallocate_memory correctly tracks the size
577 out_page->freed_size += sizeof(uint32_t);
583 // roll back uint32_t part
584 _busy_size -= sizeof(uint32_t);
590 void* allocate_object(size_t size, xml_memory_page*& out_page)
592 return allocate_memory(size, out_page);
596 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
598 if (page == _root) page->busy_size = _busy_size;
600 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
603 page->freed_size += size;
604 assert(page->freed_size <= page->busy_size);
606 if (page->freed_size == page->busy_size)
610 assert(_root == page);
612 // top page freed, just reset sizes
614 page->freed_size = 0;
616 #ifdef PUGIXML_COMPACT
617 // reset compact state to maximize efficiency
618 page->compact_string_base = 0;
619 page->compact_shared_parent = 0;
620 page->compact_page_marker = 0;
627 assert(_root != page);
630 // remove from the list
631 page->prev->next = page->next;
632 page->next->prev = page->prev;
635 deallocate_page(page);
640 char_t* allocate_string(size_t length)
642 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
644 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
646 // allocate memory for string and header block
647 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
649 // round size up to block alignment boundary
650 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
652 xml_memory_page* page;
653 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
655 if (!header) return 0;
658 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
660 assert(page_offset % xml_memory_block_alignment == 0);
661 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
662 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
664 // full_size == 0 for large strings that occupy the whole page
665 assert(full_size % xml_memory_block_alignment == 0);
666 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
667 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
669 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
670 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
671 return static_cast<char_t*>(static_cast<void*>(header + 1));
674 void deallocate_string(char_t* string)
676 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
677 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
680 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
684 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
685 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
687 // if full_size == 0 then this string occupies the whole page
688 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
690 deallocate_memory(header, full_size, page);
695 #ifdef PUGIXML_COMPACT
696 return _hash->reserve();
702 xml_memory_page* _root;
705 #ifdef PUGIXML_COMPACT
706 compact_hash_table* _hash;
710 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
712 const size_t large_allocation_threshold = xml_memory_page_size / 4;
714 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
719 if (size <= large_allocation_threshold)
721 _root->busy_size = _busy_size;
723 // insert page at the end of linked list
732 // insert page before the end of linked list, so that it is deleted as soon as possible
733 // the last page is not deleted even if it's empty (see deallocate_memory)
736 page->prev = _root->prev;
739 _root->prev->next = page;
742 page->busy_size = size;
745 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
749 #ifdef PUGIXML_COMPACT
751 static const uintptr_t compact_alignment_log2 = 2;
752 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
757 compact_header(xml_memory_page* page, unsigned int flags)
759 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
761 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
762 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
764 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
765 _flags = static_cast<unsigned char>(flags);
768 void operator&=(uintptr_t mod)
770 _flags &= static_cast<unsigned char>(mod);
773 void operator|=(uintptr_t mod)
775 _flags |= static_cast<unsigned char>(mod);
778 uintptr_t operator&(uintptr_t mod) const
783 xml_memory_page* get_page() const
785 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
786 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
787 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
789 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
794 unsigned char _flags;
797 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
799 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
801 return header->get_page();
804 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
806 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
809 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
811 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
814 template <typename T, int header_offset, int start = -126> class compact_pointer
817 compact_pointer(): _data(0)
821 void operator=(const compact_pointer& rhs)
826 void operator=(T* value)
830 // value is guaranteed to be compact-aligned; 'this' is not
831 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
832 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
833 // compensate for arithmetic shift rounding for negative values
834 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
835 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
837 if (static_cast<uintptr_t>(offset) <= 253)
838 _data = static_cast<unsigned char>(offset + 1);
841 compact_set_value<header_offset>(this, value);
856 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
858 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
861 return compact_get_value<header_offset, T>(this);
867 T* operator->() const
876 template <typename T, int header_offset> class compact_pointer_parent
879 compact_pointer_parent(): _data(0)
883 void operator=(const compact_pointer_parent& rhs)
888 void operator=(T* value)
892 // value is guaranteed to be compact-aligned; 'this' is not
893 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
894 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
895 // compensate for arithmetic shift behavior for negative values
896 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
897 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
899 if (static_cast<uintptr_t>(offset) <= 65533)
901 _data = static_cast<unsigned short>(offset + 1);
905 xml_memory_page* page = compact_get_page(this, header_offset);
907 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
908 page->compact_shared_parent = value;
910 if (page->compact_shared_parent == value)
916 compact_set_value<header_offset>(this, value);
934 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
936 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
938 else if (_data == 65534)
939 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
941 return compact_get_value<header_offset, T>(this);
947 T* operator->() const
956 template <int header_offset, int base_offset> class compact_string
959 compact_string(): _data(0)
963 void operator=(const compact_string& rhs)
968 void operator=(char_t* value)
972 xml_memory_page* page = compact_get_page(this, header_offset);
974 if (PUGI__UNLIKELY(page->compact_string_base == 0))
975 page->compact_string_base = value;
977 ptrdiff_t offset = value - page->compact_string_base;
979 if (static_cast<uintptr_t>(offset) < (65535 << 7))
981 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
982 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
986 *base = static_cast<uint16_t>((offset >> 7) + 1);
987 _data = static_cast<unsigned char>((offset & 127) + 1);
991 ptrdiff_t remainder = offset - ((*base - 1) << 7);
993 if (static_cast<uintptr_t>(remainder) <= 253)
995 _data = static_cast<unsigned char>(remainder + 1);
999 compact_set_value<header_offset>(this, value);
1007 compact_set_value<header_offset>(this, value);
1018 operator char_t*() const
1024 xml_memory_page* page = compact_get_page(this, header_offset);
1026 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1027 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1030 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1032 return page->compact_string_base + offset;
1036 return compact_get_value<header_offset, char_t>(this);
1044 unsigned char _data;
1049 #ifdef PUGIXML_COMPACT
1052 struct xml_attribute_struct
1054 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1056 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1059 impl::compact_header header;
1061 uint16_t namevalue_base;
1063 impl::compact_string<4, 2> name;
1064 impl::compact_string<5, 3> value;
1066 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1067 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1070 struct xml_node_struct
1072 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1074 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1077 impl::compact_header header;
1079 uint16_t namevalue_base;
1081 impl::compact_string<4, 2> name;
1082 impl::compact_string<5, 3> value;
1084 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1086 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1088 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1089 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1091 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1097 struct xml_attribute_struct
1099 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1101 header = PUGI__GETHEADER_IMPL(this, page, 0);
1109 xml_attribute_struct* prev_attribute_c;
1110 xml_attribute_struct* next_attribute;
1113 struct xml_node_struct
1115 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1117 header = PUGI__GETHEADER_IMPL(this, page, type);
1125 xml_node_struct* parent;
1127 xml_node_struct* first_child;
1129 xml_node_struct* prev_sibling_c;
1130 xml_node_struct* next_sibling;
1132 xml_attribute_struct* first_attribute;
1138 struct xml_extra_buffer
1141 xml_extra_buffer* next;
1144 struct xml_document_struct: public xml_node_struct, public xml_allocator
1146 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1150 const char_t* buffer;
1152 xml_extra_buffer* extra_buffers;
1154 #ifdef PUGIXML_COMPACT
1155 compact_hash_table hash;
1159 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1163 return *PUGI__GETPAGE(object)->allocator;
1166 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1170 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1174 // Low-level DOM operations
1176 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1178 xml_memory_page* page;
1179 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1180 if (!memory) return 0;
1182 return new (memory) xml_attribute_struct(page);
1185 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1187 xml_memory_page* page;
1188 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1189 if (!memory) return 0;
1191 return new (memory) xml_node_struct(page, type);
1194 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1196 if (a->header & impl::xml_memory_page_name_allocated_mask)
1197 alloc.deallocate_string(a->name);
1199 if (a->header & impl::xml_memory_page_value_allocated_mask)
1200 alloc.deallocate_string(a->value);
1202 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1205 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1207 if (n->header & impl::xml_memory_page_name_allocated_mask)
1208 alloc.deallocate_string(n->name);
1210 if (n->header & impl::xml_memory_page_value_allocated_mask)
1211 alloc.deallocate_string(n->value);
1213 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1215 xml_attribute_struct* next = attr->next_attribute;
1217 destroy_attribute(attr, alloc);
1222 for (xml_node_struct* child = n->first_child; child; )
1224 xml_node_struct* next = child->next_sibling;
1226 destroy_node(child, alloc);
1231 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1234 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1236 child->parent = node;
1238 xml_node_struct* head = node->first_child;
1242 xml_node_struct* tail = head->prev_sibling_c;
1244 tail->next_sibling = child;
1245 child->prev_sibling_c = tail;
1246 head->prev_sibling_c = child;
1250 node->first_child = child;
1251 child->prev_sibling_c = child;
1255 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1257 child->parent = node;
1259 xml_node_struct* head = node->first_child;
1263 child->prev_sibling_c = head->prev_sibling_c;
1264 head->prev_sibling_c = child;
1267 child->prev_sibling_c = child;
1269 child->next_sibling = head;
1270 node->first_child = child;
1273 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1275 xml_node_struct* parent = node->parent;
1277 child->parent = parent;
1279 if (node->next_sibling)
1280 node->next_sibling->prev_sibling_c = child;
1282 parent->first_child->prev_sibling_c = child;
1284 child->next_sibling = node->next_sibling;
1285 child->prev_sibling_c = node;
1287 node->next_sibling = child;
1290 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1292 xml_node_struct* parent = node->parent;
1294 child->parent = parent;
1296 if (node->prev_sibling_c->next_sibling)
1297 node->prev_sibling_c->next_sibling = child;
1299 parent->first_child = child;
1301 child->prev_sibling_c = node->prev_sibling_c;
1302 child->next_sibling = node;
1304 node->prev_sibling_c = child;
1307 inline void remove_node(xml_node_struct* node)
1309 xml_node_struct* parent = node->parent;
1311 if (node->next_sibling)
1312 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1314 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1316 if (node->prev_sibling_c->next_sibling)
1317 node->prev_sibling_c->next_sibling = node->next_sibling;
1319 parent->first_child = node->next_sibling;
1322 node->prev_sibling_c = 0;
1323 node->next_sibling = 0;
1326 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1328 xml_attribute_struct* head = node->first_attribute;
1332 xml_attribute_struct* tail = head->prev_attribute_c;
1334 tail->next_attribute = attr;
1335 attr->prev_attribute_c = tail;
1336 head->prev_attribute_c = attr;
1340 node->first_attribute = attr;
1341 attr->prev_attribute_c = attr;
1345 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1347 xml_attribute_struct* head = node->first_attribute;
1351 attr->prev_attribute_c = head->prev_attribute_c;
1352 head->prev_attribute_c = attr;
1355 attr->prev_attribute_c = attr;
1357 attr->next_attribute = head;
1358 node->first_attribute = attr;
1361 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1363 if (place->next_attribute)
1364 place->next_attribute->prev_attribute_c = attr;
1366 node->first_attribute->prev_attribute_c = attr;
1368 attr->next_attribute = place->next_attribute;
1369 attr->prev_attribute_c = place;
1370 place->next_attribute = attr;
1373 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1375 if (place->prev_attribute_c->next_attribute)
1376 place->prev_attribute_c->next_attribute = attr;
1378 node->first_attribute = attr;
1380 attr->prev_attribute_c = place->prev_attribute_c;
1381 attr->next_attribute = place;
1382 place->prev_attribute_c = attr;
1385 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1387 if (attr->next_attribute)
1388 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1390 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1392 if (attr->prev_attribute_c->next_attribute)
1393 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1395 node->first_attribute = attr->next_attribute;
1397 attr->prev_attribute_c = 0;
1398 attr->next_attribute = 0;
1401 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1403 if (!alloc.reserve()) return 0;
1405 xml_node_struct* child = allocate_node(alloc, type);
1406 if (!child) return 0;
1408 append_node(child, node);
1413 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1415 if (!alloc.reserve()) return 0;
1417 xml_attribute_struct* attr = allocate_attribute(alloc);
1418 if (!attr) return 0;
1420 append_attribute(attr, node);
1426 // Helper classes for code generation
1439 // Unicode utilities
1441 inline uint16_t endian_swap(uint16_t value)
1443 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1446 inline uint32_t endian_swap(uint32_t value)
1448 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1453 typedef size_t value_type;
1455 static value_type low(value_type result, uint32_t ch)
1458 if (ch < 0x80) return result + 1;
1460 else if (ch < 0x800) return result + 2;
1462 else return result + 3;
1465 static value_type high(value_type result, uint32_t)
1467 // U+10000..U+10FFFF
1474 typedef uint8_t* value_type;
1476 static value_type low(value_type result, uint32_t ch)
1481 *result = static_cast<uint8_t>(ch);
1485 else if (ch < 0x800)
1487 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1488 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1494 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1495 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1496 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1501 static value_type high(value_type result, uint32_t ch)
1503 // U+10000..U+10FFFF
1504 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1505 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1506 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1507 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1511 static value_type any(value_type result, uint32_t ch)
1513 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1517 struct utf16_counter
1519 typedef size_t value_type;
1521 static value_type low(value_type result, uint32_t)
1526 static value_type high(value_type result, uint32_t)
1534 typedef uint16_t* value_type;
1536 static value_type low(value_type result, uint32_t ch)
1538 *result = static_cast<uint16_t>(ch);
1543 static value_type high(value_type result, uint32_t ch)
1545 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1546 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1548 result[0] = static_cast<uint16_t>(0xD800 + msh);
1549 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1554 static value_type any(value_type result, uint32_t ch)
1556 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1560 struct utf32_counter
1562 typedef size_t value_type;
1564 static value_type low(value_type result, uint32_t)
1569 static value_type high(value_type result, uint32_t)
1577 typedef uint32_t* value_type;
1579 static value_type low(value_type result, uint32_t ch)
1586 static value_type high(value_type result, uint32_t ch)
1593 static value_type any(value_type result, uint32_t ch)
1601 struct latin1_writer
1603 typedef uint8_t* value_type;
1605 static value_type low(value_type result, uint32_t ch)
1607 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1612 static value_type high(value_type result, uint32_t ch)
1624 typedef uint8_t type;
1626 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1628 const uint8_t utf8_byte_mask = 0x3f;
1632 uint8_t lead = *data;
1634 // 0xxxxxxx -> U+0000..U+007F
1637 result = Traits::low(result, lead);
1641 // process aligned single-byte (ascii) blocks
1642 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1644 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1645 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1647 result = Traits::low(result, data[0]);
1648 result = Traits::low(result, data[1]);
1649 result = Traits::low(result, data[2]);
1650 result = Traits::low(result, data[3]);
1656 // 110xxxxx -> U+0080..U+07FF
1657 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1659 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1663 // 1110xxxx -> U+0800-U+FFFF
1664 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1666 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1670 // 11110xxx -> U+10000..U+10FFFF
1671 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1673 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1677 // 10xxxxxx or 11111xxx -> invalid
1689 template <typename opt_swap> struct utf16_decoder
1691 typedef uint16_t type;
1693 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1697 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1702 result = Traits::low(result, lead);
1707 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1709 result = Traits::low(result, lead);
1713 // surrogate pair lead
1714 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1716 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1718 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1720 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1741 template <typename opt_swap> struct utf32_decoder
1743 typedef uint32_t type;
1745 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1749 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1754 result = Traits::low(result, lead);
1758 // U+10000..U+10FFFF
1761 result = Traits::high(result, lead);
1771 struct latin1_decoder
1773 typedef uint8_t type;
1775 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1779 result = Traits::low(result, *data);
1788 template <size_t size> struct wchar_selector;
1790 template <> struct wchar_selector<2>
1792 typedef uint16_t type;
1793 typedef utf16_counter counter;
1794 typedef utf16_writer writer;
1795 typedef utf16_decoder<opt_false> decoder;
1798 template <> struct wchar_selector<4>
1800 typedef uint32_t type;
1801 typedef utf32_counter counter;
1802 typedef utf32_writer writer;
1803 typedef utf32_decoder<opt_false> decoder;
1806 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1807 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1809 struct wchar_decoder
1811 typedef wchar_t type;
1813 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1815 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1817 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1821 #ifdef PUGIXML_WCHAR_MODE
1822 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1824 for (size_t i = 0; i < length; ++i)
1825 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1833 ct_parse_pcdata = 1, // \0, &, \r, <
1834 ct_parse_attr = 2, // \0, &, \r, ', "
1835 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1836 ct_space = 8, // \r, \n, space, tab
1837 ct_parse_cdata = 16, // \0, ], >, \r
1838 ct_parse_comment = 32, // \0, -, >, \r
1839 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1840 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1843 static const unsigned char chartype_table[256] =
1845 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1846 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1847 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1848 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1849 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1850 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1851 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1859 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1860 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1861 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1866 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1867 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
1868 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1869 ctx_digit = 8, // 0-9
1870 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1873 static const unsigned char chartypex_table[256] =
1875 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
1876 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1877 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1878 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
1880 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1881 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1882 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1890 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1891 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1892 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1895 #ifdef PUGIXML_WCHAR_MODE
1896 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1898 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1901 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1902 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1904 PUGI__FN bool is_little_endian()
1906 unsigned int ui = 1;
1908 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1911 PUGI__FN xml_encoding get_wchar_encoding()
1913 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1915 if (sizeof(wchar_t) == 2)
1916 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1918 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1921 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1923 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1924 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1926 // check if we have a non-empty XML declaration
1927 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1930 // scan XML declaration until the encoding field
1931 for (size_t i = 6; i + 1 < size; ++i)
1933 // declaration can not contain ? in quoted values
1937 if (data[i] == 'e' && data[i + 1] == 'n')
1941 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1942 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1943 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1946 PUGI__SCANCHARTYPE(ct_space);
1947 PUGI__SCANCHAR('=');
1948 PUGI__SCANCHARTYPE(ct_space);
1950 // the only two valid delimiters are ' and "
1951 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1953 PUGI__SCANCHAR(delimiter);
1955 size_t start = offset;
1957 out_encoding = data + offset;
1959 PUGI__SCANCHARTYPE(ct_symbol);
1961 out_length = offset - start;
1963 PUGI__SCANCHAR(delimiter);
1971 #undef PUGI__SCANCHAR
1972 #undef PUGI__SCANCHARTYPE
1975 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1977 // skip encoding autodetection if input buffer is too small
1978 if (size < 4) return encoding_utf8;
1980 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1982 // look for BOM in first few bytes
1983 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1984 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1985 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1986 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1987 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1989 // look for <, <? or <?xm in various encodings
1990 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1991 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1992 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1993 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1995 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1996 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1997 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1999 // no known BOM detected; parse declaration
2000 const uint8_t* enc = 0;
2001 size_t enc_length = 0;
2003 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2005 // iso-8859-1 (case-insensitive)
2006 if (enc_length == 10
2007 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2008 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2009 && enc[8] == '-' && enc[9] == '1')
2010 return encoding_latin1;
2012 // latin1 (case-insensitive)
2014 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2015 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2017 return encoding_latin1;
2020 return encoding_utf8;
2023 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2025 // replace wchar encoding with utf implementation
2026 if (encoding == encoding_wchar) return get_wchar_encoding();
2028 // replace utf16 encoding with utf16 with specific endianness
2029 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2031 // replace utf32 encoding with utf32 with specific endianness
2032 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2034 // only do autodetection if no explicit encoding is requested
2035 if (encoding != encoding_auto) return encoding;
2037 // try to guess encoding (based on XML specification, Appendix F.1)
2038 const uint8_t* data = static_cast<const uint8_t*>(contents);
2040 return guess_buffer_encoding(data, size);
2043 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2045 size_t length = size / sizeof(char_t);
2049 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2050 out_length = length;
2054 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2055 if (!buffer) return false;
2058 memcpy(buffer, contents, length * sizeof(char_t));
2060 assert(length == 0);
2064 out_buffer = buffer;
2065 out_length = length + 1;
2071 #ifdef PUGIXML_WCHAR_MODE
2072 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2074 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2075 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2078 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2080 const char_t* data = static_cast<const char_t*>(contents);
2081 size_t length = size / sizeof(char_t);
2085 char_t* buffer = const_cast<char_t*>(data);
2087 convert_wchar_endian_swap(buffer, data, length);
2089 out_buffer = buffer;
2090 out_length = length;
2094 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2095 if (!buffer) return false;
2097 convert_wchar_endian_swap(buffer, data, length);
2100 out_buffer = buffer;
2101 out_length = length + 1;
2107 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2109 const typename D::type* data = static_cast<const typename D::type*>(contents);
2110 size_t data_length = size / sizeof(typename D::type);
2112 // first pass: get length in wchar_t units
2113 size_t length = D::process(data, data_length, 0, wchar_counter());
2115 // allocate buffer of suitable length
2116 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2117 if (!buffer) return false;
2119 // second pass: convert utf16 input to wchar_t
2120 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2121 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2123 assert(oend == obegin + length);
2126 out_buffer = buffer;
2127 out_length = length + 1;
2132 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2134 // get native encoding
2135 xml_encoding wchar_encoding = get_wchar_encoding();
2137 // fast path: no conversion required
2138 if (encoding == wchar_encoding)
2139 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2141 // only endian-swapping is required
2142 if (need_endian_swap_utf(encoding, wchar_encoding))
2143 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2145 // source encoding is utf8
2146 if (encoding == encoding_utf8)
2147 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2149 // source encoding is utf16
2150 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2152 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2154 return (native_encoding == encoding) ?
2155 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2156 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2159 // source encoding is utf32
2160 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2162 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2164 return (native_encoding == encoding) ?
2165 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2166 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2169 // source encoding is latin1
2170 if (encoding == encoding_latin1)
2171 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2173 assert(false && "Invalid encoding"); // unreachable
2177 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2179 const typename D::type* data = static_cast<const typename D::type*>(contents);
2180 size_t data_length = size / sizeof(typename D::type);
2182 // first pass: get length in utf8 units
2183 size_t length = D::process(data, data_length, 0, utf8_counter());
2185 // allocate buffer of suitable length
2186 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2187 if (!buffer) return false;
2189 // second pass: convert utf16 input to utf8
2190 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2191 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2193 assert(oend == obegin + length);
2196 out_buffer = buffer;
2197 out_length = length + 1;
2202 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2204 for (size_t i = 0; i < size; ++i)
2211 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2213 const uint8_t* data = static_cast<const uint8_t*>(contents);
2214 size_t data_length = size;
2216 // get size of prefix that does not need utf8 conversion
2217 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2218 assert(prefix_length <= data_length);
2220 const uint8_t* postfix = data + prefix_length;
2221 size_t postfix_length = data_length - prefix_length;
2223 // if no conversion is needed, just return the original buffer
2224 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2226 // first pass: get length in utf8 units
2227 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2229 // allocate buffer of suitable length
2230 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2231 if (!buffer) return false;
2233 // second pass: convert latin1 input to utf8
2234 memcpy(buffer, data, prefix_length);
2236 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2237 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2239 assert(oend == obegin + length);
2242 out_buffer = buffer;
2243 out_length = length + 1;
2248 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2250 // fast path: no conversion required
2251 if (encoding == encoding_utf8)
2252 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2254 // source encoding is utf16
2255 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2257 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2259 return (native_encoding == encoding) ?
2260 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2261 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2264 // source encoding is utf32
2265 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2267 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2269 return (native_encoding == encoding) ?
2270 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2271 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2274 // source encoding is latin1
2275 if (encoding == encoding_latin1)
2276 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2278 assert(false && "Invalid encoding"); // unreachable
2283 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2285 // get length in utf8 characters
2286 return wchar_decoder::process(str, length, 0, utf8_counter());
2289 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2292 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2293 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2295 assert(begin + size == end);
2300 #ifndef PUGIXML_NO_STL
2301 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2303 // first pass: get length in utf8 characters
2304 size_t size = as_utf8_begin(str, length);
2306 // allocate resulting string
2308 result.resize(size);
2310 // second pass: convert to utf8
2311 if (size > 0) as_utf8_end(&result[0], size, str, length);
2316 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2318 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2320 // first pass: get length in wchar_t units
2321 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2323 // allocate resulting string
2324 std::basic_string<wchar_t> result;
2325 result.resize(length);
2327 // second pass: convert to wchar_t
2330 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2331 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2333 assert(begin + length == end);
2341 template <typename Header>
2342 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2344 // never reuse shared memory
2345 if (header & xml_memory_page_contents_shared_mask) return false;
2347 size_t target_length = strlength(target);
2349 // always reuse document buffer memory if possible
2350 if ((header & header_mask) == 0) return target_length >= length;
2352 // reuse heap memory if waste is not too great
2353 const size_t reuse_threshold = 32;
2355 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2358 template <typename String, typename Header>
2359 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2361 if (source_length == 0)
2363 // empty string and null pointer are equivalent, so just deallocate old memory
2364 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2366 if (header & header_mask) alloc->deallocate_string(dest);
2368 // mark the string as not allocated
2370 header &= ~header_mask;
2374 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2376 // we can reuse old buffer, so just copy the new data (including zero terminator)
2377 memcpy(dest, source, source_length * sizeof(char_t));
2378 dest[source_length] = 0;
2384 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2386 if (!alloc->reserve()) return false;
2388 // allocate new buffer
2389 char_t* buf = alloc->allocate_string(source_length + 1);
2390 if (!buf) return false;
2392 // copy the string (including zero terminator)
2393 memcpy(buf, source, source_length * sizeof(char_t));
2394 buf[source_length] = 0;
2396 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2397 if (header & header_mask) alloc->deallocate_string(dest);
2399 // the string is now allocated, so set the flag
2401 header |= header_mask;
2412 gap(): end(0), size(0)
2416 // Push new gap, move s count bytes further (skipping the gap).
2417 // Collapse previous gap.
2418 void push(char_t*& s, size_t count)
2420 if (end) // there was a gap already; collapse it
2422 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2424 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2427 s += count; // end of current gap
2434 // Collapse all gaps, return past-the-end pointer
2435 char_t* flush(char_t* s)
2439 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2441 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2449 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2451 char_t* stre = s + 1;
2457 unsigned int ucsc = 0;
2459 if (stre[1] == 'x') // &#x... (hex code)
2465 if (ch == ';') return stre;
2469 if (static_cast<unsigned int>(ch - '0') <= 9)
2470 ucsc = 16 * ucsc + (ch - '0');
2471 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2472 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2483 else // &#... (dec code)
2485 char_t ch = *++stre;
2487 if (ch == ';') return stre;
2491 if (static_cast<unsigned int>(ch - '0') <= 9)
2492 ucsc = 10 * ucsc + (ch - '0');
2504 #ifdef PUGIXML_WCHAR_MODE
2505 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2507 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2510 g.push(s, stre - s);
2518 if (*stre == 'm') // &am
2520 if (*++stre == 'p' && *++stre == ';') // &
2525 g.push(s, stre - s);
2529 else if (*stre == 'p') // &ap
2531 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
2536 g.push(s, stre - s);
2545 if (*++stre == 't' && *++stre == ';') // >
2550 g.push(s, stre - s);
2558 if (*++stre == 't' && *++stre == ';') // <
2563 g.push(s, stre - s);
2571 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
2576 g.push(s, stre - s);
2590 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2591 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2592 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2593 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2594 #define PUGI__POPNODE() { cursor = cursor->parent; }
2595 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2596 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2597 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2598 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2599 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2600 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2602 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2608 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2610 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2612 *s++ = '\n'; // replace first one with 0x0a
2614 if (*s == '\n') g.push(s, 1);
2616 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2620 return s + (s[2] == '>' ? 3 : 2);
2630 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2636 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2638 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2640 *s++ = '\n'; // replace first one with 0x0a
2642 if (*s == '\n') g.push(s, 1);
2644 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2658 typedef char_t* (*strconv_pcdata_t)(char_t*);
2660 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2662 static char_t* parse(char_t* s)
2670 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2672 if (*s == '<') // PCDATA ends here
2674 char_t* end = g.flush(s);
2676 if (opt_trim::value)
2677 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2684 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2686 *s++ = '\n'; // replace first one with 0x0a
2688 if (*s == '\n') g.push(s, 1);
2690 else if (opt_escape::value && *s == '&')
2692 s = strconv_escape(s, g);
2696 char_t* end = g.flush(s);
2698 if (opt_trim::value)
2699 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2711 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2713 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2715 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2717 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2718 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2719 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2720 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2721 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2722 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2723 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2724 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2725 default: assert(false); return 0; // unreachable
2729 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2731 template <typename opt_escape> struct strconv_attribute_impl
2733 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2737 // trim leading whitespaces
2738 if (PUGI__IS_CHARTYPE(*s, ct_space))
2743 while (PUGI__IS_CHARTYPE(*str, ct_space));
2750 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2752 if (*s == end_quote)
2754 char_t* str = g.flush(s);
2757 while (PUGI__IS_CHARTYPE(*str, ct_space));
2761 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2765 if (PUGI__IS_CHARTYPE(*s, ct_space))
2767 char_t* str = s + 1;
2768 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2773 else if (opt_escape::value && *s == '&')
2775 s = strconv_escape(s, g);
2785 static char_t* parse_wconv(char_t* s, char_t end_quote)
2791 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2793 if (*s == end_quote)
2799 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2805 if (*s == '\n') g.push(s, 1);
2809 else if (opt_escape::value && *s == '&')
2811 s = strconv_escape(s, g);
2821 static char_t* parse_eol(char_t* s, char_t end_quote)
2827 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2829 if (*s == end_quote)
2835 else if (*s == '\r')
2839 if (*s == '\n') g.push(s, 1);
2841 else if (opt_escape::value && *s == '&')
2843 s = strconv_escape(s, g);
2853 static char_t* parse_simple(char_t* s, char_t end_quote)
2859 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2861 if (*s == end_quote)
2867 else if (opt_escape::value && *s == '&')
2869 s = strconv_escape(s, g);
2880 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2882 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2884 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2886 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2887 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2888 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2889 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2890 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2891 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2892 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2893 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2894 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2895 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2896 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2897 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2898 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2899 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2900 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2901 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2902 default: assert(false); return 0; // unreachable
2906 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2908 xml_parse_result result;
2909 result.status = status;
2910 result.offset = offset;
2917 xml_allocator* alloc;
2918 char_t* error_offset;
2919 xml_parse_status error_status;
2921 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2925 // DOCTYPE consists of nested sections of the following possible types:
2926 // <!-- ... -->, <? ... ?>, "...", '...'
2929 // First group can not contain nested groups
2930 // Second group can contain nested groups of the same type
2931 // Third group can contain all other groups
2932 char_t* parse_doctype_primitive(char_t* s)
2934 if (*s == '"' || *s == '\'')
2938 PUGI__SCANFOR(*s == ch);
2939 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2943 else if (s[0] == '<' && s[1] == '?')
2947 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2948 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2952 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2955 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2956 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2960 else PUGI__THROW_ERROR(status_bad_doctype, s);
2965 char_t* parse_doctype_ignore(char_t* s)
2969 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2974 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2976 // nested ignore section
2980 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2982 // ignore section end
2993 PUGI__THROW_ERROR(status_bad_doctype, s);
2996 char_t* parse_doctype_group(char_t* s, char_t endch)
3000 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
3005 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3010 s = parse_doctype_ignore(s);
3015 // some control group
3020 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3022 // unknown tag (forbidden), or some primitive group
3023 s = parse_doctype_primitive(s);
3037 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3042 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3044 // parse node contents, starting with exclamation mark
3047 if (*s == '-') // '<!-...'
3051 if (*s == '-') // '<!--...'
3055 if (PUGI__OPTSET(parse_comments))
3057 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3058 cursor->value = s; // Save the offset.
3061 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3063 s = strconv_comment(s, endch);
3065 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3069 // Scan for terminating '-->'.
3070 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3071 PUGI__CHECK_ERROR(status_bad_comment, s);
3073 if (PUGI__OPTSET(parse_comments))
3074 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3076 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3079 else PUGI__THROW_ERROR(status_bad_comment, s);
3084 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3088 if (PUGI__OPTSET(parse_cdata))
3090 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3091 cursor->value = s; // Save the offset.
3093 if (PUGI__OPTSET(parse_eol))
3095 s = strconv_cdata(s, endch);
3097 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3101 // Scan for terminating ']]>'.
3102 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3103 PUGI__CHECK_ERROR(status_bad_cdata, s);
3105 *s++ = 0; // Zero-terminate this segment.
3108 else // Flagged for discard, but we still have to scan for the terminator.
3110 // Scan for terminating ']]>'.
3111 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3112 PUGI__CHECK_ERROR(status_bad_cdata, s);
3117 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3119 else PUGI__THROW_ERROR(status_bad_cdata, s);
3121 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3125 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3127 char_t* mark = s + 9;
3129 s = parse_doctype_group(s, endch);
3132 assert((*s == 0 && endch == '>') || *s == '>');
3135 if (PUGI__OPTSET(parse_doctype))
3137 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3139 PUGI__PUSHNODE(node_doctype);
3141 cursor->value = mark;
3144 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3145 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3146 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3151 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3153 // load into registers
3154 xml_node_struct* cursor = ref_cursor;
3157 // parse node contents, starting with question mark
3163 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3165 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3166 PUGI__CHECK_ERROR(status_bad_pi, s);
3168 // determine node type; stricmp / strcasecmp is not portable
3169 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3171 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3175 // disallow non top-level declarations
3176 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3178 PUGI__PUSHNODE(node_declaration);
3182 PUGI__PUSHNODE(node_pi);
3185 cursor->name = target;
3189 // parse value/attributes
3193 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3198 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3205 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3206 PUGI__CHECK_ERROR(status_bad_pi, s);
3210 // replace ending ? with / so that 'element' terminates properly
3213 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3218 // store value and step over >
3219 cursor->value = value;
3228 else PUGI__THROW_ERROR(status_bad_pi, s);
3233 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3234 PUGI__CHECK_ERROR(status_bad_pi, s);
3236 s += (s[1] == '>' ? 2 : 1);
3239 // store from registers
3240 ref_cursor = cursor;
3245 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3247 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3248 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3251 xml_node_struct* cursor = root;
3261 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3263 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3267 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3268 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3274 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3279 PUGI__SKIPWS(); // Eat any whitespace.
3281 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3283 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3284 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3286 a->name = s; // Save the offset.
3288 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3289 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3291 if (PUGI__IS_CHARTYPE(ch, ct_space))
3293 PUGI__SKIPWS(); // Eat any whitespace.
3299 if (ch == '=') // '<... #=...'
3301 PUGI__SKIPWS(); // Eat any whitespace.
3303 if (*s == '"' || *s == '\'') // '<... #="...'
3305 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3306 ++s; // Step over the quote.
3307 a->value = s; // Save the offset.
3309 s = strconv_attribute(s, ch);
3311 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3313 // After this line the loop continues from the start;
3314 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3315 // everything else will be detected
3316 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3318 else PUGI__THROW_ERROR(status_bad_attribute, s);
3320 else PUGI__THROW_ERROR(status_bad_attribute, s);
3332 else if (*s == 0 && endch == '>')
3337 else PUGI__THROW_ERROR(status_bad_start_element, s);
3345 else if (*s == 0 && endch == '>')
3349 else PUGI__THROW_ERROR(status_bad_start_element, s);
3354 else if (ch == '/') // '<#.../'
3356 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3358 PUGI__POPNODE(); // Pop.
3364 // we stepped over null terminator, backtrack & handle closing tag
3367 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3369 else PUGI__THROW_ERROR(status_bad_start_element, s);
3377 char_t* name = cursor->name;
3378 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3380 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3382 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3387 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3388 else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3391 PUGI__POPNODE(); // Pop.
3397 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3401 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3405 else if (*s == '?') // '<?...'
3407 s = parse_question(s, cursor, optmsk, endch);
3411 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3413 else if (*s == '!') // '<!...'
3415 s = parse_exclamation(s, cursor, optmsk, endch);
3418 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3419 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3423 mark = s; // Save this offset while searching for a terminator.
3425 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3427 if (*s == '<' || !*s)
3429 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3432 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3436 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3438 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3442 if (!PUGI__OPTSET(parse_trim_pcdata))
3445 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3447 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3449 cursor->value = s; // Save the offset.
3453 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3455 cursor->value = s; // Save the offset.
3457 PUGI__POPNODE(); // Pop since this is a standalone.
3460 s = strconv_pcdata(s);
3466 PUGI__SCANFOR(*s == '<'); // '...<'
3477 // check that last tag is closed
3478 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3483 #ifdef PUGIXML_WCHAR_MODE
3484 static char_t* parse_skip_bom(char_t* s)
3486 unsigned int bom = 0xfeff;
3487 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3490 static char_t* parse_skip_bom(char_t* s)
3492 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3496 static bool has_element_node_siblings(xml_node_struct* node)
3500 if (PUGI__NODETYPE(node) == node_element) return true;
3502 node = node->next_sibling;
3508 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3510 // early-out for empty documents
3512 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3514 // get last child of the root before parsing
3515 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3517 // create parser on stack
3518 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3520 // save last character and make buffer zero-terminated (speeds up parsing)
3521 char_t endch = buffer[length - 1];
3522 buffer[length - 1] = 0;
3524 // skip BOM to make sure it does not end up as part of parse output
3525 char_t* buffer_data = parse_skip_bom(buffer);
3527 // perform actual parsing
3528 parser.parse_tree(buffer_data, root, optmsk, endch);
3530 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3531 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3535 // since we removed last character, we have to handle the only possible false positive (stray <)
3537 return make_parse_result(status_unrecognized_tag, length - 1);
3539 // check if there are any element nodes parsed
3540 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3542 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3543 return make_parse_result(status_no_document_element, length - 1);
3547 // roll back offset if it occurs on a null terminator in the source buffer
3548 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3556 // Output facilities
3557 PUGI__FN xml_encoding get_write_native_encoding()
3559 #ifdef PUGIXML_WCHAR_MODE
3560 return get_wchar_encoding();
3562 return encoding_utf8;
3566 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3568 // replace wchar encoding with utf implementation
3569 if (encoding == encoding_wchar) return get_wchar_encoding();
3571 // replace utf16 encoding with utf16 with specific endianness
3572 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3574 // replace utf32 encoding with utf32 with specific endianness
3575 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3577 // only do autodetection if no explicit encoding is requested
3578 if (encoding != encoding_auto) return encoding;
3580 // assume utf8 encoding
3581 return encoding_utf8;
3584 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3586 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3588 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3590 return static_cast<size_t>(end - dest) * sizeof(*dest);
3593 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3595 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3597 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3601 for (typename T::value_type i = dest; i != end; ++i)
3602 *i = endian_swap(*i);
3605 return static_cast<size_t>(end - dest) * sizeof(*dest);
3608 #ifdef PUGIXML_WCHAR_MODE
3609 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3611 if (length < 1) return 0;
3613 // discard last character if it's the lead of a surrogate pair
3614 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3617 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3619 // only endian-swapping is required
3620 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3622 convert_wchar_endian_swap(r_char, data, length);
3624 return length * sizeof(char_t);
3628 if (encoding == encoding_utf8)
3629 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3632 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3634 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3636 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3640 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3642 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3644 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3647 // convert to latin1
3648 if (encoding == encoding_latin1)
3649 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3651 assert(false && "Invalid encoding"); // unreachable
3655 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3657 if (length < 5) return 0;
3659 for (size_t i = 1; i <= 4; ++i)
3661 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3663 // either a standalone character or a leading one
3664 if ((ch & 0xc0) != 0x80) return length - i;
3667 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3671 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3673 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3675 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3677 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3680 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3682 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3684 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3687 if (encoding == encoding_latin1)
3688 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3690 assert(false && "Invalid encoding"); // unreachable
3695 class xml_buffered_writer
3697 xml_buffered_writer(const xml_buffered_writer&);
3698 xml_buffered_writer& operator=(const xml_buffered_writer&);
3701 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3703 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3708 flush(buffer, bufsize);
3713 void flush(const char_t* data, size_t size)
3715 if (size == 0) return;
3717 // fast path, just write data
3718 if (encoding == get_write_native_encoding())
3719 writer.write(data, size * sizeof(char_t));
3723 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3724 assert(result <= sizeof(scratch));
3727 writer.write(scratch.data_u8, result);
3731 void write_direct(const char_t* data, size_t length)
3733 // flush the remaining buffer contents
3736 // handle large chunks
3737 if (length > bufcapacity)
3739 if (encoding == get_write_native_encoding())
3741 // fast path, can just write data chunk
3742 writer.write(data, length * sizeof(char_t));
3746 // need to convert in suitable chunks
3747 while (length > bufcapacity)
3749 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3750 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3751 size_t chunk_size = get_valid_length(data, bufcapacity);
3754 // convert chunk and write
3755 flush(data, chunk_size);
3759 length -= chunk_size;
3762 // small tail is copied below
3766 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3770 void write_buffer(const char_t* data, size_t length)
3772 size_t offset = bufsize;
3774 if (offset + length <= bufcapacity)
3776 memcpy(buffer + offset, data, length * sizeof(char_t));
3777 bufsize = offset + length;
3781 write_direct(data, length);
3785 void write_string(const char_t* data)
3787 // write the part of the string that fits in the buffer
3788 size_t offset = bufsize;
3790 while (*data && offset < bufcapacity)
3791 buffer[offset++] = *data++;
3794 if (offset < bufcapacity)
3800 // backtrack a bit if we have split the codepoint
3801 size_t length = offset - bufsize;
3802 size_t extra = length - get_valid_length(data - length, length);
3804 bufsize = offset - extra;
3806 write_direct(data - extra, strlength(data) + extra);
3810 void write(char_t d0)
3812 size_t offset = bufsize;
3813 if (offset > bufcapacity - 1) offset = flush();
3815 buffer[offset + 0] = d0;
3816 bufsize = offset + 1;
3819 void write(char_t d0, char_t d1)
3821 size_t offset = bufsize;
3822 if (offset > bufcapacity - 2) offset = flush();
3824 buffer[offset + 0] = d0;
3825 buffer[offset + 1] = d1;
3826 bufsize = offset + 2;
3829 void write(char_t d0, char_t d1, char_t d2)
3831 size_t offset = bufsize;
3832 if (offset > bufcapacity - 3) offset = flush();
3834 buffer[offset + 0] = d0;
3835 buffer[offset + 1] = d1;
3836 buffer[offset + 2] = d2;
3837 bufsize = offset + 3;
3840 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3842 size_t offset = bufsize;
3843 if (offset > bufcapacity - 4) offset = flush();
3845 buffer[offset + 0] = d0;
3846 buffer[offset + 1] = d1;
3847 buffer[offset + 2] = d2;
3848 buffer[offset + 3] = d3;
3849 bufsize = offset + 4;
3852 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3854 size_t offset = bufsize;
3855 if (offset > bufcapacity - 5) offset = flush();
3857 buffer[offset + 0] = d0;
3858 buffer[offset + 1] = d1;
3859 buffer[offset + 2] = d2;
3860 buffer[offset + 3] = d3;
3861 buffer[offset + 4] = d4;
3862 bufsize = offset + 5;
3865 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3867 size_t offset = bufsize;
3868 if (offset > bufcapacity - 6) offset = flush();
3870 buffer[offset + 0] = d0;
3871 buffer[offset + 1] = d1;
3872 buffer[offset + 2] = d2;
3873 buffer[offset + 3] = d3;
3874 buffer[offset + 4] = d4;
3875 buffer[offset + 5] = d5;
3876 bufsize = offset + 6;
3879 // utf8 maximum expansion: x4 (-> utf32)
3880 // utf16 maximum expansion: x2 (-> utf32)
3881 // utf32 maximum expansion: x1
3885 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3886 PUGIXML_MEMORY_OUTPUT_STACK
3891 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3894 char_t buffer[bufcapacity];
3898 uint8_t data_u8[4 * bufcapacity];
3899 uint16_t data_u16[2 * bufcapacity];
3900 uint32_t data_u32[bufcapacity];
3901 char_t data_char[bufcapacity];
3906 xml_encoding encoding;
3909 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3913 const char_t* prev = s;
3915 // While *s is a usual symbol
3916 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3918 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3924 writer.write('&', 'a', 'm', 'p', ';');
3928 writer.write('&', 'l', 't', ';');
3932 writer.write('&', 'g', 't', ';');
3936 if (flags & format_attribute_single_quote)
3939 writer.write('&', 'q', 'u', 'o', 't', ';');
3943 if (flags & format_attribute_single_quote)
3944 writer.write('&', 'a', 'p', 'o', 's', ';');
3949 default: // s is not a usual symbol
3951 unsigned int ch = static_cast<unsigned int>(*s++);
3954 if (!(flags & format_skip_control_chars))
3955 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3961 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3963 if (flags & format_no_escapes)
3964 writer.write_string(s);
3966 text_output_escaped(writer, s, type, flags);
3969 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3973 writer.write('<', '!', '[', 'C', 'D');
3974 writer.write('A', 'T', 'A', '[');
3976 const char_t* prev = s;
3978 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3979 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3981 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3984 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3986 writer.write(']', ']', '>');
3991 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3993 switch (indent_length)
3997 for (unsigned int i = 0; i < depth; ++i)
3998 writer.write(indent[0]);
4004 for (unsigned int i = 0; i < depth; ++i)
4005 writer.write(indent[0], indent[1]);
4011 for (unsigned int i = 0; i < depth; ++i)
4012 writer.write(indent[0], indent[1], indent[2]);
4018 for (unsigned int i = 0; i < depth; ++i)
4019 writer.write(indent[0], indent[1], indent[2], indent[3]);
4025 for (unsigned int i = 0; i < depth; ++i)
4026 writer.write_buffer(indent, indent_length);
4031 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4033 writer.write('<', '!', '-', '-');
4037 const char_t* prev = s;
4039 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4040 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4042 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4048 writer.write('-', ' ');
4053 writer.write('-', '-', '>');
4056 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4060 const char_t* prev = s;
4062 // look for ?> sequence - we can't output it since ?> terminates PI
4063 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4065 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4069 assert(s[0] == '?' && s[1] == '>');
4071 writer.write('?', ' ', '>');
4077 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4079 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4080 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4082 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4084 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4088 text_output_indent(writer, indent, indent_length, depth + 1);
4095 writer.write_string(a->name ? a->name + 0 : default_name);
4096 writer.write('=', enquotation_char);
4099 text_output(writer, a->value, ctx_special_attr, flags);
4101 writer.write(enquotation_char);
4105 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4107 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4108 const char_t* name = node->name ? node->name + 0 : default_name;
4111 writer.write_string(name);
4113 if (node->first_attribute)
4114 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4116 // element nodes can have value if parse_embed_pcdata was used
4119 if (!node->first_child)
4121 if (flags & format_no_empty_element_tags)
4123 writer.write('>', '<', '/');
4124 writer.write_string(name);
4131 if ((flags & format_raw) == 0)
4134 writer.write('/', '>');
4150 text_output(writer, node->value, ctx_special_pcdata, flags);
4152 if (!node->first_child)
4154 writer.write('<', '/');
4155 writer.write_string(name);
4167 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4169 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4170 const char_t* name = node->name ? node->name + 0 : default_name;
4172 writer.write('<', '/');
4173 writer.write_string(name);
4177 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4179 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4181 switch (PUGI__NODETYPE(node))
4184 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4188 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4192 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4196 writer.write('<', '?');
4197 writer.write_string(node->name ? node->name + 0 : default_name);
4202 node_output_pi_value(writer, node->value);
4205 writer.write('?', '>');
4208 case node_declaration:
4209 writer.write('<', '?');
4210 writer.write_string(node->name ? node->name + 0 : default_name);
4211 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4212 writer.write('?', '>');
4216 writer.write('<', '!', 'D', 'O', 'C');
4217 writer.write('T', 'Y', 'P', 'E');
4222 writer.write_string(node->value);
4229 assert(false && "Invalid node type"); // unreachable
4239 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4241 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4242 unsigned int indent_flags = indent_indent;
4244 xml_node_struct* node = root;
4250 // begin writing current node
4251 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4253 node_output_simple(writer, node, flags);
4259 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4262 if ((indent_flags & indent_indent) && indent_length)
4263 text_output_indent(writer, indent, indent_length, depth);
4265 if (PUGI__NODETYPE(node) == node_element)
4267 indent_flags = indent_newline | indent_indent;
4269 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4271 // element nodes can have value if parse_embed_pcdata was used
4275 node = node->first_child;
4280 else if (PUGI__NODETYPE(node) == node_document)
4282 indent_flags = indent_indent;
4284 if (node->first_child)
4286 node = node->first_child;
4292 node_output_simple(writer, node, flags);
4294 indent_flags = indent_newline | indent_indent;
4298 // continue to the next node
4299 while (node != root)
4301 if (node->next_sibling)
4303 node = node->next_sibling;
4307 node = node->parent;
4309 // write closing node
4310 if (PUGI__NODETYPE(node) == node_element)
4314 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4317 if ((indent_flags & indent_indent) && indent_length)
4318 text_output_indent(writer, indent, indent_length, depth);
4320 node_output_end(writer, node);
4322 indent_flags = indent_newline | indent_indent;
4326 while (node != root);
4328 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4332 PUGI__FN bool has_declaration(xml_node_struct* node)
4334 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4336 xml_node_type type = PUGI__NODETYPE(child);
4338 if (type == node_declaration) return true;
4339 if (type == node_element) return false;
4345 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4347 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4354 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4356 return parent == node_element || parent == node_declaration;
4359 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4361 if (parent != node_document && parent != node_element) return false;
4362 if (child == node_document || child == node_null) return false;
4363 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4368 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4370 // check that child can be a child of parent
4371 if (!allow_insert_child(parent.type(), child.type()))
4374 // check that node is not moved between documents
4375 if (parent.root() != child.root())
4378 // check that new parent is not in the child subtree
4379 xml_node cur = parent;
4392 template <typename String, typename Header>
4393 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4395 assert(!dest && (header & header_mask) == 0);
4399 if (alloc && (source_header & header_mask) == 0)
4403 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4404 header |= xml_memory_page_contents_shared_mask;
4405 source_header |= xml_memory_page_contents_shared_mask;
4408 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4412 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4414 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4415 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4417 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4419 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4423 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4424 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4429 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4431 xml_allocator& alloc = get_allocator(dn);
4432 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4434 node_copy_contents(dn, sn, shared_alloc);
4436 xml_node_struct* dit = dn;
4437 xml_node_struct* sit = sn->first_child;
4439 while (sit && sit != sn)
4441 // loop invariant: dit is inside the subtree rooted at dn
4444 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4447 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4451 node_copy_contents(copy, sit, shared_alloc);
4453 if (sit->first_child)
4456 sit = sit->first_child;
4462 // continue to the next node
4465 if (sit->next_sibling)
4467 sit = sit->next_sibling;
4474 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4475 assert(sit == sn || dit);
4480 assert(!sit || dit == dn->parent);
4483 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4485 xml_allocator& alloc = get_allocator(da);
4486 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4488 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4489 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4492 inline bool is_text_node(xml_node_struct* node)
4494 xml_node_type type = PUGI__NODETYPE(node);
4496 return type == node_pcdata || type == node_cdata;
4499 // get value with conversion functions
4500 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4503 const char_t* s = value;
4505 while (PUGI__IS_CHARTYPE(*s, ct_space))
4508 bool negative = (*s == '-');
4510 s += (*s == '+' || *s == '-');
4512 bool overflow = false;
4514 if (s[0] == '0' && (s[1] | ' ') == 'x')
4518 // since overflow detection relies on length of the sequence skip leading zeros
4522 const char_t* start = s;
4526 if (static_cast<unsigned>(*s - '0') < 10)
4527 result = result * 16 + (*s - '0');
4528 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4529 result = result * 16 + ((*s | ' ') - 'a' + 10);
4536 size_t digits = static_cast<size_t>(s - start);
4538 overflow = digits > sizeof(U) * 2;
4542 // since overflow detection relies on length of the sequence skip leading zeros
4546 const char_t* start = s;
4550 if (static_cast<unsigned>(*s - '0') < 10)
4551 result = result * 10 + (*s - '0');
4558 size_t digits = static_cast<size_t>(s - start);
4560 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4562 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4563 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4564 const size_t high_bit = sizeof(U) * 8 - 1;
4566 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4571 // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4573 return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4575 return (overflow || result > 0 - minv) ? minv : 0 - result;
4579 return (overflow || result > maxv) ? maxv : result;
4582 PUGI__FN int get_value_int(const char_t* value)
4584 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4587 PUGI__FN unsigned int get_value_uint(const char_t* value)
4589 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4592 PUGI__FN double get_value_double(const char_t* value)
4594 #ifdef PUGIXML_WCHAR_MODE
4595 return wcstod(value, 0);
4597 return strtod(value, 0);
4601 PUGI__FN float get_value_float(const char_t* value)
4603 #ifdef PUGIXML_WCHAR_MODE
4604 return static_cast<float>(wcstod(value, 0));
4606 return static_cast<float>(strtod(value, 0));
4610 PUGI__FN bool get_value_bool(const char_t* value)
4612 // only look at first char
4613 char_t first = *value;
4615 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4616 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4619 #ifdef PUGIXML_HAS_LONG_LONG
4620 PUGI__FN long long get_value_llong(const char_t* value)
4622 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4625 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4627 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4631 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4633 char_t* result = end - 1;
4634 U rest = negative ? 0 - value : value;
4638 *result-- = static_cast<char_t>('0' + (rest % 10));
4643 assert(result >= begin);
4648 return result + !negative;
4651 // set value with conversion functions
4652 template <typename String, typename Header>
4653 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4655 #ifdef PUGIXML_WCHAR_MODE
4657 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4660 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4662 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4664 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4668 template <typename U, typename String, typename Header>
4669 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4672 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4673 char_t* begin = integer_to_string(buf, end, value, negative);
4675 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4678 template <typename String, typename Header>
4679 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4682 PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
4684 return set_value_ascii(dest, header, header_mask, buf);
4687 template <typename String, typename Header>
4688 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4691 PUGI__SNPRINTF(buf, "%.*g", precision, value);
4693 return set_value_ascii(dest, header, header_mask, buf);
4696 template <typename String, typename Header>
4697 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4699 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4702 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4704 // check input buffer
4705 if (!contents && size) return make_parse_result(status_io_error);
4707 // get actual encoding
4708 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4710 // get private buffer
4714 // coverity[var_deref_model]
4715 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4717 // delete original buffer if we performed a conversion
4718 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4720 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4721 if (own || buffer != contents) *out_buffer = buffer;
4723 // store buffer for offset_debug
4724 doc->buffer = buffer;
4727 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4729 // remember encoding
4730 res.encoding = buffer_encoding;
4735 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4736 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4738 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4739 // there are 64-bit versions of fseek/ftell, let's use them
4740 typedef __int64 length_type;
4742 _fseeki64(file, 0, SEEK_END);
4743 length_type length = _ftelli64(file);
4744 _fseeki64(file, 0, SEEK_SET);
4745 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4746 // there are 64-bit versions of fseek/ftell, let's use them
4747 typedef off64_t length_type;
4749 fseeko64(file, 0, SEEK_END);
4750 length_type length = ftello64(file);
4751 fseeko64(file, 0, SEEK_SET);
4753 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4754 typedef long length_type;
4756 fseek(file, 0, SEEK_END);
4757 length_type length = ftell(file);
4758 fseek(file, 0, SEEK_SET);
4761 // check for I/O errors
4762 if (length < 0) return status_io_error;
4764 // check for overflow
4765 size_t result = static_cast<size_t>(length);
4767 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4770 out_result = result;
4775 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4776 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4778 // We only need to zero-terminate if encoding conversion does not do it for us
4779 #ifdef PUGIXML_WCHAR_MODE
4780 xml_encoding wchar_encoding = get_wchar_encoding();
4782 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4784 size_t length = size / sizeof(char_t);
4786 static_cast<char_t*>(buffer)[length] = 0;
4787 return (length + 1) * sizeof(char_t);
4790 if (encoding == encoding_utf8)
4792 static_cast<char*>(buffer)[size] = 0;
4800 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4802 if (!file) return make_parse_result(status_file_not_found);
4804 // get file size (can result in I/O errors)
4806 xml_parse_status size_status = get_file_size(file, size);
4807 if (size_status != status_ok) return make_parse_result(size_status);
4809 size_t max_suffix_size = sizeof(char_t);
4811 // allocate buffer for the whole file
4812 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4813 if (!contents) return make_parse_result(status_out_of_memory);
4815 // read file in memory
4816 size_t read_size = fread(contents, 1, size, file);
4818 if (read_size != size)
4820 xml_memory::deallocate(contents);
4821 return make_parse_result(status_io_error);
4824 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4826 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4829 PUGI__FN void close_file(FILE* file)
4834 #ifndef PUGIXML_NO_STL
4835 template <typename T> struct xml_stream_chunk
4837 static xml_stream_chunk* create()
4839 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4840 if (!memory) return 0;
4842 return new (memory) xml_stream_chunk();
4845 static void destroy(xml_stream_chunk* chunk)
4850 xml_stream_chunk* next_ = chunk->next;
4852 xml_memory::deallocate(chunk);
4858 xml_stream_chunk(): next(0), size(0)
4862 xml_stream_chunk* next;
4865 T data[xml_memory_page_size / sizeof(T)];
4868 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4870 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4872 // read file to a chunk list
4874 xml_stream_chunk<T>* last = 0;
4876 while (!stream.eof())
4878 // allocate new chunk
4879 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4880 if (!chunk) return status_out_of_memory;
4882 // append chunk to list
4883 if (last) last = last->next = chunk;
4884 else chunks.data = last = chunk;
4886 // read data to chunk
4887 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4888 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4890 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4891 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4893 // guard against huge files (chunk size is small enough to make this overflow check work)
4894 if (total + chunk->size < total) return status_out_of_memory;
4895 total += chunk->size;
4898 size_t max_suffix_size = sizeof(char_t);
4900 // copy chunk list to a contiguous buffer
4901 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4902 if (!buffer) return status_out_of_memory;
4904 char* write = buffer;
4906 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4908 assert(write + chunk->size <= buffer + total);
4909 memcpy(write, chunk->data, chunk->size);
4910 write += chunk->size;
4913 assert(write == buffer + total);
4916 *out_buffer = buffer;
4922 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4924 // get length of remaining data in stream
4925 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4926 stream.seekg(0, std::ios::end);
4927 std::streamoff length = stream.tellg() - pos;
4930 if (stream.fail() || pos < 0) return status_io_error;
4932 // guard against huge files
4933 size_t read_length = static_cast<size_t>(length);
4935 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4937 size_t max_suffix_size = sizeof(char_t);
4939 // read stream data into memory (guard against stream exceptions with buffer holder)
4940 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4941 if (!buffer.data) return status_out_of_memory;
4943 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4945 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4946 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4949 size_t actual_length = static_cast<size_t>(stream.gcount());
4950 assert(actual_length <= read_length);
4952 *out_buffer = buffer.release();
4953 *out_size = actual_length * sizeof(T);
4958 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4962 xml_parse_status status = status_ok;
4964 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4965 if (stream.fail()) return make_parse_result(status_io_error);
4967 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4968 if (stream.tellg() < 0)
4970 stream.clear(); // clear error flags that could be set by a failing tellg
4971 status = load_stream_data_noseek(stream, &buffer, &size);
4974 status = load_stream_data_seek(stream, &buffer, &size);
4976 if (status != status_ok) return make_parse_result(status);
4978 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4980 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4984 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4985 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4987 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4989 return _wfopen_s(&file, path, mode) == 0 ? file : 0;
4991 return _wfopen(path, mode);
4995 PUGI__FN char* convert_path_heap(const wchar_t* str)
4999 // first pass: get length in utf8 characters
5000 size_t length = strlength_wide(str);
5001 size_t size = as_utf8_begin(str, length);
5003 // allocate resulting string
5004 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5005 if (!result) return 0;
5007 // second pass: convert to utf8
5008 as_utf8_end(result, size, str, length);
5016 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5018 // there is no standard function to open wide paths, so our best bet is to try utf8 path
5019 char* path_utf8 = convert_path_heap(path);
5020 if (!path_utf8) return 0;
5022 // convert mode to ASCII (we mirror _wfopen interface)
5023 char mode_ascii[4] = {0};
5024 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5026 // try to open the utf8 path
5027 FILE* result = fopen(path_utf8, mode_ascii);
5029 // free dummy buffer
5030 xml_memory::deallocate(path_utf8);
5036 PUGI__FN FILE* open_file(const char* path, const char* mode)
5038 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5040 return fopen_s(&file, path, mode) == 0 ? file : 0;
5042 return fopen(path, mode);
5046 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5048 if (!file) return false;
5050 xml_writer_file writer(file);
5051 doc.save(writer, indent, flags, encoding);
5053 return ferror(file) == 0;
5056 struct name_null_sentry
5058 xml_node_struct* node;
5061 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5075 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5079 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5081 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5082 (void)!result; // unfortunately we can't do proper error handling here
5085 #ifndef PUGIXML_NO_STL
5086 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5090 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5094 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5098 assert(!wide_stream);
5099 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5103 assert(wide_stream);
5104 assert(size % sizeof(wchar_t) == 0);
5106 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5111 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5115 PUGI__FN xml_tree_walker::~xml_tree_walker()
5119 PUGI__FN int xml_tree_walker::depth() const
5124 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5129 PUGI__FN bool xml_tree_walker::end(xml_node&)
5134 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5138 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5142 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5146 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5148 return _attr ? unspecified_bool_xml_attribute : 0;
5151 PUGI__FN bool xml_attribute::operator!() const
5156 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5158 return (_attr == r._attr);
5161 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5163 return (_attr != r._attr);
5166 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5168 return (_attr < r._attr);
5171 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5173 return (_attr > r._attr);
5176 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5178 return (_attr <= r._attr);
5181 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5183 return (_attr >= r._attr);
5186 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5188 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5191 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5193 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5196 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5198 return (_attr && _attr->value) ? _attr->value + 0 : def;
5201 PUGI__FN int xml_attribute::as_int(int def) const
5203 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5206 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5208 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5211 PUGI__FN double xml_attribute::as_double(double def) const
5213 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5216 PUGI__FN float xml_attribute::as_float(float def) const
5218 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5221 PUGI__FN bool xml_attribute::as_bool(bool def) const
5223 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5226 #ifdef PUGIXML_HAS_LONG_LONG
5227 PUGI__FN long long xml_attribute::as_llong(long long def) const
5229 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5232 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5234 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5238 PUGI__FN bool xml_attribute::empty() const
5243 PUGI__FN const char_t* xml_attribute::name() const
5245 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5248 PUGI__FN const char_t* xml_attribute::value() const
5250 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5253 PUGI__FN size_t xml_attribute::hash_value() const
5255 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5258 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5263 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5269 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5275 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5281 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5287 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5293 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5299 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5305 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5311 #ifdef PUGIXML_HAS_LONG_LONG
5312 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5318 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5325 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5327 if (!_attr) return false;
5329 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5332 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5334 if (!_attr) return false;
5336 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5339 PUGI__FN bool xml_attribute::set_value(int rhs)
5341 if (!_attr) return false;
5343 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5346 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5348 if (!_attr) return false;
5350 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5353 PUGI__FN bool xml_attribute::set_value(long rhs)
5355 if (!_attr) return false;
5357 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5360 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5362 if (!_attr) return false;
5364 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5367 PUGI__FN bool xml_attribute::set_value(double rhs)
5369 if (!_attr) return false;
5371 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5374 PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
5376 if (!_attr) return false;
5378 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5381 PUGI__FN bool xml_attribute::set_value(float rhs)
5383 if (!_attr) return false;
5385 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5388 PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
5390 if (!_attr) return false;
5392 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5395 PUGI__FN bool xml_attribute::set_value(bool rhs)
5397 if (!_attr) return false;
5399 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5402 #ifdef PUGIXML_HAS_LONG_LONG
5403 PUGI__FN bool xml_attribute::set_value(long long rhs)
5405 if (!_attr) return false;
5407 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5410 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5412 if (!_attr) return false;
5414 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5419 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5421 return (bool)lhs && rhs;
5424 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5426 return (bool)lhs || rhs;
5430 PUGI__FN xml_node::xml_node(): _root(0)
5434 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5438 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5442 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5444 return _root ? unspecified_bool_xml_node : 0;
5447 PUGI__FN bool xml_node::operator!() const
5452 PUGI__FN xml_node::iterator xml_node::begin() const
5454 return iterator(_root ? _root->first_child + 0 : 0, _root);
5457 PUGI__FN xml_node::iterator xml_node::end() const
5459 return iterator(0, _root);
5462 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5464 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5467 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5469 return attribute_iterator(0, _root);
5472 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5474 return xml_object_range<xml_node_iterator>(begin(), end());
5477 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5479 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5482 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5484 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5487 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5489 return (_root == r._root);
5492 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5494 return (_root != r._root);
5497 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5499 return (_root < r._root);
5502 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5504 return (_root > r._root);
5507 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5509 return (_root <= r._root);
5512 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5514 return (_root >= r._root);
5517 PUGI__FN bool xml_node::empty() const
5522 PUGI__FN const char_t* xml_node::name() const
5524 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5527 PUGI__FN xml_node_type xml_node::type() const
5529 return _root ? PUGI__NODETYPE(_root) : node_null;
5532 PUGI__FN const char_t* xml_node::value() const
5534 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5537 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5539 if (!_root) return xml_node();
5541 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5542 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5547 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5549 if (!_root) return xml_attribute();
5551 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5552 if (i->name && impl::strequal(name_, i->name))
5553 return xml_attribute(i);
5555 return xml_attribute();
5558 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5560 if (!_root) return xml_node();
5562 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5563 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5568 PUGI__FN xml_node xml_node::next_sibling() const
5570 return _root ? xml_node(_root->next_sibling) : xml_node();
5573 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5575 if (!_root) return xml_node();
5577 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5578 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5583 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5585 xml_attribute_struct* hint = hint_._attr;
5587 // if hint is not an attribute of node, behavior is not defined
5588 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5590 if (!_root) return xml_attribute();
5592 // optimistically search from hint up until the end
5593 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5594 if (i->name && impl::strequal(name_, i->name))
5596 // update hint to maximize efficiency of searching for consecutive attributes
5597 hint_._attr = i->next_attribute;
5599 return xml_attribute(i);
5602 // wrap around and search from the first attribute until the hint
5603 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5604 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5605 if (j->name && impl::strequal(name_, j->name))
5607 // update hint to maximize efficiency of searching for consecutive attributes
5608 hint_._attr = j->next_attribute;
5610 return xml_attribute(j);
5613 return xml_attribute();
5616 PUGI__FN xml_node xml_node::previous_sibling() const
5618 if (!_root) return xml_node();
5620 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5621 else return xml_node();
5624 PUGI__FN xml_node xml_node::parent() const
5626 return _root ? xml_node(_root->parent) : xml_node();
5629 PUGI__FN xml_node xml_node::root() const
5631 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5634 PUGI__FN xml_text xml_node::text() const
5636 return xml_text(_root);
5639 PUGI__FN const char_t* xml_node::child_value() const
5641 if (!_root) return PUGIXML_TEXT("");
5643 // element nodes can have value if parse_embed_pcdata was used
5644 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5645 return _root->value;
5647 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5648 if (impl::is_text_node(i) && i->value)
5651 return PUGIXML_TEXT("");
5654 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5656 return child(name_).child_value();
5659 PUGI__FN xml_attribute xml_node::first_attribute() const
5661 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5664 PUGI__FN xml_attribute xml_node::last_attribute() const
5666 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5669 PUGI__FN xml_node xml_node::first_child() const
5671 return _root ? xml_node(_root->first_child) : xml_node();
5674 PUGI__FN xml_node xml_node::last_child() const
5676 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5679 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5681 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5683 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5686 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5689 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5691 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5693 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5696 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5699 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5701 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5703 impl::xml_allocator& alloc = impl::get_allocator(_root);
5704 if (!alloc.reserve()) return xml_attribute();
5706 xml_attribute a(impl::allocate_attribute(alloc));
5707 if (!a) return xml_attribute();
5709 impl::append_attribute(a._attr, _root);
5716 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5718 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5720 impl::xml_allocator& alloc = impl::get_allocator(_root);
5721 if (!alloc.reserve()) return xml_attribute();
5723 xml_attribute a(impl::allocate_attribute(alloc));
5724 if (!a) return xml_attribute();
5726 impl::prepend_attribute(a._attr, _root);
5733 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5735 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5736 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5738 impl::xml_allocator& alloc = impl::get_allocator(_root);
5739 if (!alloc.reserve()) return xml_attribute();
5741 xml_attribute a(impl::allocate_attribute(alloc));
5742 if (!a) return xml_attribute();
5744 impl::insert_attribute_after(a._attr, attr._attr, _root);
5751 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5753 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5754 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5756 impl::xml_allocator& alloc = impl::get_allocator(_root);
5757 if (!alloc.reserve()) return xml_attribute();
5759 xml_attribute a(impl::allocate_attribute(alloc));
5760 if (!a) return xml_attribute();
5762 impl::insert_attribute_before(a._attr, attr._attr, _root);
5769 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5771 if (!proto) return xml_attribute();
5772 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5774 impl::xml_allocator& alloc = impl::get_allocator(_root);
5775 if (!alloc.reserve()) return xml_attribute();
5777 xml_attribute a(impl::allocate_attribute(alloc));
5778 if (!a) return xml_attribute();
5780 impl::append_attribute(a._attr, _root);
5781 impl::node_copy_attribute(a._attr, proto._attr);
5786 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5788 if (!proto) return xml_attribute();
5789 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5791 impl::xml_allocator& alloc = impl::get_allocator(_root);
5792 if (!alloc.reserve()) return xml_attribute();
5794 xml_attribute a(impl::allocate_attribute(alloc));
5795 if (!a) return xml_attribute();
5797 impl::prepend_attribute(a._attr, _root);
5798 impl::node_copy_attribute(a._attr, proto._attr);
5803 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5805 if (!proto) return xml_attribute();
5806 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5807 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5809 impl::xml_allocator& alloc = impl::get_allocator(_root);
5810 if (!alloc.reserve()) return xml_attribute();
5812 xml_attribute a(impl::allocate_attribute(alloc));
5813 if (!a) return xml_attribute();
5815 impl::insert_attribute_after(a._attr, attr._attr, _root);
5816 impl::node_copy_attribute(a._attr, proto._attr);
5821 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5823 if (!proto) return xml_attribute();
5824 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5825 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5827 impl::xml_allocator& alloc = impl::get_allocator(_root);
5828 if (!alloc.reserve()) return xml_attribute();
5830 xml_attribute a(impl::allocate_attribute(alloc));
5831 if (!a) return xml_attribute();
5833 impl::insert_attribute_before(a._attr, attr._attr, _root);
5834 impl::node_copy_attribute(a._attr, proto._attr);
5839 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5841 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5843 impl::xml_allocator& alloc = impl::get_allocator(_root);
5844 if (!alloc.reserve()) return xml_node();
5846 xml_node n(impl::allocate_node(alloc, type_));
5847 if (!n) return xml_node();
5849 impl::append_node(n._root, _root);
5851 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5856 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5858 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5860 impl::xml_allocator& alloc = impl::get_allocator(_root);
5861 if (!alloc.reserve()) return xml_node();
5863 xml_node n(impl::allocate_node(alloc, type_));
5864 if (!n) return xml_node();
5866 impl::prepend_node(n._root, _root);
5868 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5873 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5875 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5876 if (!node._root || node._root->parent != _root) return xml_node();
5878 impl::xml_allocator& alloc = impl::get_allocator(_root);
5879 if (!alloc.reserve()) return xml_node();
5881 xml_node n(impl::allocate_node(alloc, type_));
5882 if (!n) return xml_node();
5884 impl::insert_node_before(n._root, node._root);
5886 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5891 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5893 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5894 if (!node._root || node._root->parent != _root) return xml_node();
5896 impl::xml_allocator& alloc = impl::get_allocator(_root);
5897 if (!alloc.reserve()) return xml_node();
5899 xml_node n(impl::allocate_node(alloc, type_));
5900 if (!n) return xml_node();
5902 impl::insert_node_after(n._root, node._root);
5904 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5909 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5911 xml_node result = append_child(node_element);
5913 result.set_name(name_);
5918 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5920 xml_node result = prepend_child(node_element);
5922 result.set_name(name_);
5927 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5929 xml_node result = insert_child_after(node_element, node);
5931 result.set_name(name_);
5936 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5938 xml_node result = insert_child_before(node_element, node);
5940 result.set_name(name_);
5945 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5947 xml_node_type type_ = proto.type();
5948 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5950 impl::xml_allocator& alloc = impl::get_allocator(_root);
5951 if (!alloc.reserve()) return xml_node();
5953 xml_node n(impl::allocate_node(alloc, type_));
5954 if (!n) return xml_node();
5956 impl::append_node(n._root, _root);
5957 impl::node_copy_tree(n._root, proto._root);
5962 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5964 xml_node_type type_ = proto.type();
5965 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5967 impl::xml_allocator& alloc = impl::get_allocator(_root);
5968 if (!alloc.reserve()) return xml_node();
5970 xml_node n(impl::allocate_node(alloc, type_));
5971 if (!n) return xml_node();
5973 impl::prepend_node(n._root, _root);
5974 impl::node_copy_tree(n._root, proto._root);
5979 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5981 xml_node_type type_ = proto.type();
5982 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5983 if (!node._root || node._root->parent != _root) return xml_node();
5985 impl::xml_allocator& alloc = impl::get_allocator(_root);
5986 if (!alloc.reserve()) return xml_node();
5988 xml_node n(impl::allocate_node(alloc, type_));
5989 if (!n) return xml_node();
5991 impl::insert_node_after(n._root, node._root);
5992 impl::node_copy_tree(n._root, proto._root);
5997 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5999 xml_node_type type_ = proto.type();
6000 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6001 if (!node._root || node._root->parent != _root) return xml_node();
6003 impl::xml_allocator& alloc = impl::get_allocator(_root);
6004 if (!alloc.reserve()) return xml_node();
6006 xml_node n(impl::allocate_node(alloc, type_));
6007 if (!n) return xml_node();
6009 impl::insert_node_before(n._root, node._root);
6010 impl::node_copy_tree(n._root, proto._root);
6015 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
6017 if (!impl::allow_move(*this, moved)) return xml_node();
6019 impl::xml_allocator& alloc = impl::get_allocator(_root);
6020 if (!alloc.reserve()) return xml_node();
6022 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6023 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6025 impl::remove_node(moved._root);
6026 impl::append_node(moved._root, _root);
6031 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
6033 if (!impl::allow_move(*this, moved)) return xml_node();
6035 impl::xml_allocator& alloc = impl::get_allocator(_root);
6036 if (!alloc.reserve()) return xml_node();
6038 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6039 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6041 impl::remove_node(moved._root);
6042 impl::prepend_node(moved._root, _root);
6047 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6049 if (!impl::allow_move(*this, moved)) return xml_node();
6050 if (!node._root || node._root->parent != _root) return xml_node();
6051 if (moved._root == node._root) return xml_node();
6053 impl::xml_allocator& alloc = impl::get_allocator(_root);
6054 if (!alloc.reserve()) return xml_node();
6056 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6057 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6059 impl::remove_node(moved._root);
6060 impl::insert_node_after(moved._root, node._root);
6065 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6067 if (!impl::allow_move(*this, moved)) return xml_node();
6068 if (!node._root || node._root->parent != _root) return xml_node();
6069 if (moved._root == node._root) return xml_node();
6071 impl::xml_allocator& alloc = impl::get_allocator(_root);
6072 if (!alloc.reserve()) return xml_node();
6074 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6075 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6077 impl::remove_node(moved._root);
6078 impl::insert_node_before(moved._root, node._root);
6083 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6085 return remove_attribute(attribute(name_));
6088 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6090 if (!_root || !a._attr) return false;
6091 if (!impl::is_attribute_of(a._attr, _root)) return false;
6093 impl::xml_allocator& alloc = impl::get_allocator(_root);
6094 if (!alloc.reserve()) return false;
6096 impl::remove_attribute(a._attr, _root);
6097 impl::destroy_attribute(a._attr, alloc);
6102 PUGI__FN bool xml_node::remove_attributes()
6104 if (!_root) return false;
6106 impl::xml_allocator& alloc = impl::get_allocator(_root);
6107 if (!alloc.reserve()) return false;
6109 for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6111 xml_attribute_struct* next = attr->next_attribute;
6113 impl::destroy_attribute(attr, alloc);
6118 _root->first_attribute = 0;
6123 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6125 return remove_child(child(name_));
6128 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6130 if (!_root || !n._root || n._root->parent != _root) return false;
6132 impl::xml_allocator& alloc = impl::get_allocator(_root);
6133 if (!alloc.reserve()) return false;
6135 impl::remove_node(n._root);
6136 impl::destroy_node(n._root, alloc);
6141 PUGI__FN bool xml_node::remove_children()
6143 if (!_root) return false;
6145 impl::xml_allocator& alloc = impl::get_allocator(_root);
6146 if (!alloc.reserve()) return false;
6148 for (xml_node_struct* cur = _root->first_child; cur; )
6150 xml_node_struct* next = cur->next_sibling;
6152 impl::destroy_node(cur, alloc);
6157 _root->first_child = 0;
6162 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6164 // append_buffer is only valid for elements/documents
6165 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6167 // get document node
6168 impl::xml_document_struct* doc = &impl::get_document(_root);
6170 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6171 doc->header |= impl::xml_memory_page_contents_shared_mask;
6173 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6174 impl::xml_memory_page* page = 0;
6175 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6178 if (!extra) return impl::make_parse_result(status_out_of_memory);
6180 #ifdef PUGIXML_COMPACT
6181 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6182 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6183 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6186 // add extra buffer to the list
6188 extra->next = doc->extra_buffers;
6189 doc->extra_buffers = extra;
6191 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6192 impl::name_null_sentry sentry(_root);
6194 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6197 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6199 if (!_root) return xml_node();
6201 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6202 if (i->name && impl::strequal(name_, i->name))
6204 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6205 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6212 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6214 if (!_root) return xml_node();
6216 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6217 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6218 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6224 #ifndef PUGIXML_NO_STL
6225 PUGI__FN string_t xml_node::path(char_t delimiter) const
6227 if (!_root) return string_t();
6231 for (xml_node_struct* i = _root; i; i = i->parent)
6233 offset += (i != _root);
6234 offset += i->name ? impl::strlength(i->name) : 0;
6238 result.resize(offset);
6240 for (xml_node_struct* j = _root; j; j = j->parent)
6243 result[--offset] = delimiter;
6247 size_t length = impl::strlength(j->name);
6250 memcpy(&result[offset], j->name, length * sizeof(char_t));
6254 assert(offset == 0);
6260 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6262 xml_node context = path_[0] == delimiter ? root() : *this;
6264 if (!context._root) return xml_node();
6266 const char_t* path_segment = path_;
6268 while (*path_segment == delimiter) ++path_segment;
6270 const char_t* path_segment_end = path_segment;
6272 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6274 if (path_segment == path_segment_end) return context;
6276 const char_t* next_segment = path_segment_end;
6278 while (*next_segment == delimiter) ++next_segment;
6280 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6281 return context.first_element_by_path(next_segment, delimiter);
6282 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6283 return context.parent().first_element_by_path(next_segment, delimiter);
6286 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6288 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6290 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6292 if (subsearch) return subsearch;
6300 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6304 xml_node arg_begin(_root);
6305 if (!walker.begin(arg_begin)) return false;
6307 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6315 xml_node arg_for_each(cur);
6316 if (!walker.for_each(arg_for_each))
6319 if (cur->first_child)
6322 cur = cur->first_child;
6324 else if (cur->next_sibling)
6325 cur = cur->next_sibling;
6328 while (!cur->next_sibling && cur != _root && cur->parent)
6335 cur = cur->next_sibling;
6338 while (cur && cur != _root);
6341 assert(walker._depth == -1);
6343 xml_node arg_end(_root);
6344 return walker.end(arg_end);
6347 PUGI__FN size_t xml_node::hash_value() const
6349 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6352 PUGI__FN xml_node_struct* xml_node::internal_object() const
6357 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6361 impl::xml_buffered_writer buffered_writer(writer, encoding);
6363 impl::node_output(buffered_writer, _root, indent, flags, depth);
6365 buffered_writer.flush();
6368 #ifndef PUGIXML_NO_STL
6369 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6371 xml_writer_stream writer(stream);
6373 print(writer, indent, flags, encoding, depth);
6376 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6378 xml_writer_stream writer(stream);
6380 print(writer, indent, flags, encoding_wchar, depth);
6384 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6386 if (!_root) return -1;
6388 impl::xml_document_struct& doc = impl::get_document(_root);
6390 // we can determine the offset reliably only if there is exactly once parse buffer
6391 if (!doc.buffer || doc.extra_buffers) return -1;
6399 case node_declaration:
6401 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6407 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6410 assert(false && "Invalid node type"); // unreachable
6416 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6418 return (bool)lhs && rhs;
6421 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6423 return (bool)lhs || rhs;
6427 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6431 PUGI__FN xml_node_struct* xml_text::_data() const
6433 if (!_root || impl::is_text_node(_root)) return _root;
6435 // element nodes can have value if parse_embed_pcdata was used
6436 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6439 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6440 if (impl::is_text_node(node))
6446 PUGI__FN xml_node_struct* xml_text::_data_new()
6448 xml_node_struct* d = _data();
6451 return xml_node(_root).append_child(node_pcdata).internal_object();
6454 PUGI__FN xml_text::xml_text(): _root(0)
6458 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6462 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6464 return _data() ? unspecified_bool_xml_text : 0;
6467 PUGI__FN bool xml_text::operator!() const
6472 PUGI__FN bool xml_text::empty() const
6474 return _data() == 0;
6477 PUGI__FN const char_t* xml_text::get() const
6479 xml_node_struct* d = _data();
6481 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6484 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6486 xml_node_struct* d = _data();
6488 return (d && d->value) ? d->value + 0 : def;
6491 PUGI__FN int xml_text::as_int(int def) const
6493 xml_node_struct* d = _data();
6495 return (d && d->value) ? impl::get_value_int(d->value) : def;
6498 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6500 xml_node_struct* d = _data();
6502 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6505 PUGI__FN double xml_text::as_double(double def) const
6507 xml_node_struct* d = _data();
6509 return (d && d->value) ? impl::get_value_double(d->value) : def;
6512 PUGI__FN float xml_text::as_float(float def) const
6514 xml_node_struct* d = _data();
6516 return (d && d->value) ? impl::get_value_float(d->value) : def;
6519 PUGI__FN bool xml_text::as_bool(bool def) const
6521 xml_node_struct* d = _data();
6523 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6526 #ifdef PUGIXML_HAS_LONG_LONG
6527 PUGI__FN long long xml_text::as_llong(long long def) const
6529 xml_node_struct* d = _data();
6531 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6534 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6536 xml_node_struct* d = _data();
6538 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6542 PUGI__FN bool xml_text::set(const char_t* rhs)
6544 xml_node_struct* dn = _data_new();
6546 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6549 PUGI__FN bool xml_text::set(int rhs)
6551 xml_node_struct* dn = _data_new();
6553 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6556 PUGI__FN bool xml_text::set(unsigned int rhs)
6558 xml_node_struct* dn = _data_new();
6560 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6563 PUGI__FN bool xml_text::set(long rhs)
6565 xml_node_struct* dn = _data_new();
6567 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6570 PUGI__FN bool xml_text::set(unsigned long rhs)
6572 xml_node_struct* dn = _data_new();
6574 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6577 PUGI__FN bool xml_text::set(float rhs)
6579 xml_node_struct* dn = _data_new();
6581 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6584 PUGI__FN bool xml_text::set(float rhs, int precision)
6586 xml_node_struct* dn = _data_new();
6588 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6591 PUGI__FN bool xml_text::set(double rhs)
6593 xml_node_struct* dn = _data_new();
6595 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6598 PUGI__FN bool xml_text::set(double rhs, int precision)
6600 xml_node_struct* dn = _data_new();
6602 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6605 PUGI__FN bool xml_text::set(bool rhs)
6607 xml_node_struct* dn = _data_new();
6609 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6612 #ifdef PUGIXML_HAS_LONG_LONG
6613 PUGI__FN bool xml_text::set(long long rhs)
6615 xml_node_struct* dn = _data_new();
6617 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6620 PUGI__FN bool xml_text::set(unsigned long long rhs)
6622 xml_node_struct* dn = _data_new();
6624 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6628 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6634 PUGI__FN xml_text& xml_text::operator=(int rhs)
6640 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6646 PUGI__FN xml_text& xml_text::operator=(long rhs)
6652 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6658 PUGI__FN xml_text& xml_text::operator=(double rhs)
6664 PUGI__FN xml_text& xml_text::operator=(float rhs)
6670 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6676 #ifdef PUGIXML_HAS_LONG_LONG
6677 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6683 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6690 PUGI__FN xml_node xml_text::data() const
6692 return xml_node(_data());
6696 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6698 return (bool)lhs && rhs;
6701 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6703 return (bool)lhs || rhs;
6707 PUGI__FN xml_node_iterator::xml_node_iterator()
6711 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6715 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6719 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6721 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6724 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6726 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6729 PUGI__FN xml_node& xml_node_iterator::operator*() const
6731 assert(_wrap._root);
6735 PUGI__FN xml_node* xml_node_iterator::operator->() const
6737 assert(_wrap._root);
6738 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6741 PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
6743 assert(_wrap._root);
6744 _wrap._root = _wrap._root->next_sibling;
6748 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6750 xml_node_iterator temp = *this;
6755 PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
6757 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6761 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6763 xml_node_iterator temp = *this;
6768 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6772 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6776 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6780 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6782 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6785 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6787 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6790 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6792 assert(_wrap._attr);
6796 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6798 assert(_wrap._attr);
6799 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6802 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
6804 assert(_wrap._attr);
6805 _wrap._attr = _wrap._attr->next_attribute;
6809 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6811 xml_attribute_iterator temp = *this;
6816 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
6818 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6822 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6824 xml_attribute_iterator temp = *this;
6829 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6833 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6837 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6841 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6843 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6846 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6848 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6851 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6853 assert(_wrap._root);
6857 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6859 assert(_wrap._root);
6860 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6863 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
6865 assert(_wrap._root);
6866 _wrap = _wrap.next_sibling(_name);
6870 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6872 xml_named_node_iterator temp = *this;
6877 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
6880 _wrap = _wrap.previous_sibling(_name);
6883 _wrap = _parent.last_child();
6885 if (!impl::strequal(_wrap.name(), _name))
6886 _wrap = _wrap.previous_sibling(_name);
6892 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6894 xml_named_node_iterator temp = *this;
6899 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6903 PUGI__FN xml_parse_result::operator bool() const
6905 return status == status_ok;
6908 PUGI__FN const char* xml_parse_result::description() const
6912 case status_ok: return "No error";
6914 case status_file_not_found: return "File was not found";
6915 case status_io_error: return "Error reading from file/stream";
6916 case status_out_of_memory: return "Could not allocate memory";
6917 case status_internal_error: return "Internal error occurred";
6919 case status_unrecognized_tag: return "Could not determine tag type";
6921 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6922 case status_bad_comment: return "Error parsing comment";
6923 case status_bad_cdata: return "Error parsing CDATA section";
6924 case status_bad_doctype: return "Error parsing document type declaration";
6925 case status_bad_pcdata: return "Error parsing PCDATA section";
6926 case status_bad_start_element: return "Error parsing start element tag";
6927 case status_bad_attribute: return "Error parsing element attribute";
6928 case status_bad_end_element: return "Error parsing end element tag";
6929 case status_end_element_mismatch: return "Start-end tags mismatch";
6931 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6933 case status_no_document_element: return "No document element found";
6935 default: return "Unknown error";
6939 PUGI__FN xml_document::xml_document(): _buffer(0)
6944 PUGI__FN xml_document::~xml_document()
6949 #ifdef PUGIXML_HAS_MOVE
6950 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6956 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6958 if (this == &rhs) return *this;
6968 PUGI__FN void xml_document::reset()
6974 PUGI__FN void xml_document::reset(const xml_document& proto)
6978 impl::node_copy_tree(_root, proto._root);
6981 PUGI__FN void xml_document::_create()
6985 #ifdef PUGIXML_COMPACT
6986 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6987 const size_t page_offset = sizeof(void*);
6989 const size_t page_offset = 0;
6992 // initialize sentinel page
6993 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6995 // prepare page structure
6996 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6999 page->busy_size = impl::xml_memory_page_size;
7001 // setup first page marker
7002 #ifdef PUGIXML_COMPACT
7003 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
7004 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7005 *page->compact_page_marker = sizeof(impl::xml_memory_page);
7008 // allocate new root
7009 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7010 _root->prev_sibling_c = _root;
7012 // setup sentinel page
7013 page->allocator = static_cast<impl::xml_document_struct*>(_root);
7015 // setup hash table pointer in allocator
7016 #ifdef PUGIXML_COMPACT
7017 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7020 // verify the document allocation
7021 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7024 PUGI__FN void xml_document::_destroy()
7028 // destroy static storage
7031 impl::xml_memory::deallocate(_buffer);
7035 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7036 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7038 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7041 // destroy dynamic storage, leave sentinel page (it's in static memory)
7042 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
7043 assert(root_page && !root_page->prev);
7044 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7046 for (impl::xml_memory_page* page = root_page->next; page; )
7048 impl::xml_memory_page* next = page->next;
7050 impl::xml_allocator::deallocate_page(page);
7055 #ifdef PUGIXML_COMPACT
7056 // destroy hash table
7057 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7063 #ifdef PUGIXML_HAS_MOVE
7064 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7066 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7067 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7069 // save first child pointer for later; this needs hash access
7070 xml_node_struct* other_first_child = other->first_child;
7072 #ifdef PUGIXML_COMPACT
7073 // reserve space for the hash table up front; this is the only operation that can fail
7074 // if it does, we have no choice but to throw (if we have exceptions)
7075 if (other_first_child)
7077 size_t other_children = 0;
7078 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7081 // in compact mode, each pointer assignment could result in a hash table request
7082 // during move, we have to relocate document first_child and parents of all children
7083 // normally there's just one child and its parent has a pointerless encoding but
7084 // we assume the worst here
7085 if (!other->_hash->reserve(other_children + 1))
7087 #ifdef PUGIXML_NO_EXCEPTIONS
7090 throw std::bad_alloc();
7096 // move allocation state
7097 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
7098 if (other->_root != PUGI__GETPAGE(other))
7100 doc->_root = other->_root;
7101 doc->_busy_size = other->_busy_size;
7104 // move buffer state
7105 doc->buffer = other->buffer;
7106 doc->extra_buffers = other->extra_buffers;
7107 _buffer = rhs._buffer;
7109 #ifdef PUGIXML_COMPACT
7110 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7111 doc->hash = other->hash;
7112 doc->_hash = &doc->hash;
7114 // make sure we don't access other hash up until the end when we reinitialize other document
7118 // move page structure
7119 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7120 assert(doc_page && !doc_page->prev && !doc_page->next);
7122 impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7123 assert(other_page && !other_page->prev);
7125 // relink pages since root page is embedded into xml_document
7126 if (impl::xml_memory_page* page = other_page->next)
7128 assert(page->prev == other_page);
7130 page->prev = doc_page;
7132 doc_page->next = page;
7133 other_page->next = 0;
7136 // make sure pages point to the correct document state
7137 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7139 assert(page->allocator == other);
7141 page->allocator = doc;
7143 #ifdef PUGIXML_COMPACT
7144 // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7145 if (page->compact_shared_parent == other)
7146 page->compact_shared_parent = doc;
7150 // move tree structure
7151 assert(!doc->first_child);
7153 doc->first_child = other_first_child;
7155 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7157 #ifdef PUGIXML_COMPACT
7158 // most children will have migrated when we reassigned compact_shared_parent
7159 assert(node->parent == other || node->parent == doc);
7163 assert(node->parent == other);
7168 // reset other document
7169 new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7174 #ifndef PUGIXML_NO_STL
7175 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7179 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7182 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7186 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7190 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7192 // Force native encoding (skip autodetection)
7193 #ifdef PUGIXML_WCHAR_MODE
7194 xml_encoding encoding = encoding_wchar;
7196 xml_encoding encoding = encoding_utf8;
7199 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7202 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7204 return load_string(contents, options);
7207 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7211 using impl::auto_deleter; // MSVC7 workaround
7212 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7214 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7217 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7221 using impl::auto_deleter; // MSVC7 workaround
7222 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7224 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7227 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7231 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7234 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7238 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7241 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7245 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7248 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7250 impl::xml_buffered_writer buffered_writer(writer, encoding);
7252 if ((flags & format_write_bom) && encoding != encoding_latin1)
7254 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7255 #ifdef PUGIXML_WCHAR_MODE
7256 unsigned int bom = 0xfeff;
7257 buffered_writer.write(static_cast<wchar_t>(bom));
7259 buffered_writer.write('\xef', '\xbb', '\xbf');
7263 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7265 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7266 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7267 buffered_writer.write('?', '>');
7268 if (!(flags & format_raw)) buffered_writer.write('\n');
7271 impl::node_output(buffered_writer, _root, indent, flags, 0);
7273 buffered_writer.flush();
7276 #ifndef PUGIXML_NO_STL
7277 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7279 xml_writer_stream writer(stream);
7281 save(writer, indent, flags, encoding);
7284 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7286 xml_writer_stream writer(stream);
7288 save(writer, indent, flags, encoding_wchar);
7292 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7294 using impl::auto_deleter; // MSVC7 workaround
7295 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7297 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7300 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7302 using impl::auto_deleter; // MSVC7 workaround
7303 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7305 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7308 PUGI__FN xml_node xml_document::document_element() const
7312 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7313 if (PUGI__NODETYPE(i) == node_element)
7319 #ifndef PUGIXML_NO_STL
7320 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7324 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7327 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7329 return impl::as_utf8_impl(str.c_str(), str.size());
7332 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7336 return impl::as_wide_impl(str, strlen(str));
7339 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7341 return impl::as_wide_impl(str.c_str(), str.size());
7345 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7347 impl::xml_memory::allocate = allocate;
7348 impl::xml_memory::deallocate = deallocate;
7351 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7353 return impl::xml_memory::allocate;
7356 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7358 return impl::xml_memory::deallocate;
7362 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7365 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7366 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7368 return std::bidirectional_iterator_tag();
7371 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7373 return std::bidirectional_iterator_tag();
7376 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7378 return std::bidirectional_iterator_tag();
7383 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7386 // Workarounds for (non-standard) iterator category detection
7387 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7389 return std::bidirectional_iterator_tag();
7392 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7394 return std::bidirectional_iterator_tag();
7397 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7399 return std::bidirectional_iterator_tag();
7404 #ifndef PUGIXML_NO_XPATH
7409 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7417 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7425 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7433 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7439 template <typename T> inline void swap(T& lhs, T& rhs)
7446 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
7450 for (I it = begin + 1; it != end; ++it)
7451 if (pred(*it, *result))
7457 template <typename I> PUGI__FN void reverse(I begin, I end)
7459 while (end - begin > 1)
7460 swap(*begin++, *--end);
7463 template <typename I> PUGI__FN I unique(I begin, I end)
7466 while (end - begin > 1 && *begin != *(begin + 1))
7472 // last written element
7475 // merge unique elements
7476 while (begin != end)
7478 if (*begin != *write)
7479 *++write = *begin++;
7484 // past-the-end (write points to live element)
7488 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
7493 for (T* it = begin + 1; it != end; ++it)
7498 // move hole backwards
7499 while (hole > begin && pred(val, *(hole - 1)))
7501 *hole = *(hole - 1);
7505 // fill hole with element
7510 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7512 if (pred(*middle, *first))
7513 swap(middle, first);
7514 if (pred(*last, *middle))
7516 if (pred(*middle, *first))
7517 swap(middle, first);
7522 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7524 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7531 if (pred(*lt, pivot))
7533 else if (*lt == pivot)
7539 // we now have just 4 groups: = < >; move equal elements to the middle
7542 for (T* it = begin; it != eq; ++it)
7543 swap(*it, *--eqbeg);
7549 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
7551 // sort large chunks
7552 while (end - begin > 16)
7554 // find median element
7555 I middle = begin + (end - begin) / 2;
7556 I median = median3(begin, middle, end - 1, pred);
7558 // partition in three chunks (< = >)
7560 partition3(begin, end, *median, pred, &eqbeg, &eqend);
7562 // loop on larger half
7563 if (eqbeg - begin > end - eqend)
7565 sort(eqend, end, pred);
7570 sort(begin, eqbeg, pred);
7575 // insertion sort small chunk
7576 insertion_sort(begin, end, pred);
7579 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
7583 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7585 // MurmurHash3 32-bit finalizer
7592 size_t hashmod = size - 1;
7593 size_t bucket = h & hashmod;
7595 for (size_t probe = 0; probe <= hashmod; ++probe)
7597 if (table[bucket] == 0)
7599 table[bucket] = key;
7603 if (table[bucket] == key)
7606 // hash collision, quadratic probing
7607 bucket = (bucket + probe + 1) & hashmod;
7610 assert(false && "Hash table is full"); // unreachable
7615 // Allocator used for AST and evaluation stacks
7617 static const size_t xpath_memory_page_size =
7618 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7619 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7625 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7627 struct xpath_memory_block
7629 xpath_memory_block* next;
7634 char data[xpath_memory_page_size];
7639 struct xpath_allocator
7641 xpath_memory_block* _root;
7645 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7649 void* allocate(size_t size)
7651 // round size up to block alignment boundary
7652 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7654 if (_root_size + size <= _root->capacity)
7656 void* buf = &_root->data[0] + _root_size;
7662 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7663 size_t block_capacity_base = sizeof(_root->data);
7664 size_t block_capacity_req = size + block_capacity_base / 4;
7665 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7667 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7669 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7672 if (_error) *_error = true;
7676 block->next = _root;
7677 block->capacity = block_capacity;
7686 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7688 // round size up to block alignment boundary
7689 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7690 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7692 // we can only reallocate the last object
7693 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7695 // try to reallocate the object inplace
7696 if (ptr && _root_size - old_size + new_size <= _root->capacity)
7698 _root_size = _root_size - old_size + new_size;
7702 // allocate a new block
7703 void* result = allocate(new_size);
7704 if (!result) return 0;
7706 // we have a new block
7709 // copy old data (we only support growing)
7710 assert(new_size >= old_size);
7711 memcpy(result, ptr, old_size);
7713 // free the previous page if it had no other objects
7714 assert(_root->data == result);
7715 assert(_root->next);
7717 if (_root->next->data == ptr)
7719 // deallocate the whole page, unless it was the first one
7720 xpath_memory_block* next = _root->next->next;
7724 xml_memory::deallocate(_root->next);
7733 void revert(const xpath_allocator& state)
7735 // free all new pages
7736 xpath_memory_block* cur = _root;
7738 while (cur != state._root)
7740 xpath_memory_block* next = cur->next;
7742 xml_memory::deallocate(cur);
7748 _root = state._root;
7749 _root_size = state._root_size;
7754 xpath_memory_block* cur = _root;
7759 xpath_memory_block* next = cur->next;
7761 xml_memory::deallocate(cur);
7768 struct xpath_allocator_capture
7770 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7774 ~xpath_allocator_capture()
7776 _target->revert(_state);
7779 xpath_allocator* _target;
7780 xpath_allocator _state;
7785 xpath_allocator* result;
7786 xpath_allocator* temp;
7789 struct xpath_stack_data
7791 xpath_memory_block blocks[2];
7792 xpath_allocator result;
7793 xpath_allocator temp;
7797 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7799 blocks[0].next = blocks[1].next = 0;
7800 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7802 stack.result = &result;
7818 const char_t* _buffer;
7820 size_t _length_heap;
7822 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7824 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7825 if (!result) return 0;
7827 memcpy(result, string, length * sizeof(char_t));
7833 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7838 static xpath_string from_const(const char_t* str)
7840 return xpath_string(str, false, 0);
7843 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7845 assert(begin <= end && *end == 0);
7847 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7850 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7852 assert(begin <= end);
7855 return xpath_string();
7857 size_t length = static_cast<size_t>(end - begin);
7858 const char_t* data = duplicate_string(begin, length, alloc);
7860 return data ? xpath_string(data, true, length) : xpath_string();
7863 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7867 void append(const xpath_string& o, xpath_allocator* alloc)
7869 // skip empty sources
7870 if (!*o._buffer) return;
7872 // fast append for constant empty target and constant source
7873 if (!*_buffer && !_uses_heap && !o._uses_heap)
7875 _buffer = o._buffer;
7879 // need to make heap copy
7880 size_t target_length = length();
7881 size_t source_length = o.length();
7882 size_t result_length = target_length + source_length;
7884 // allocate new buffer
7885 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7886 if (!result) return;
7888 // append first string to the new buffer in case there was no reallocation
7889 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7891 // append second string to the new buffer
7892 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7893 result[result_length] = 0;
7898 _length_heap = result_length;
7902 const char_t* c_str() const
7907 size_t length() const
7909 return _uses_heap ? _length_heap : strlength(_buffer);
7912 char_t* data(xpath_allocator* alloc)
7914 // make private heap copy
7917 size_t length_ = strlength(_buffer);
7918 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7920 if (!data_) return 0;
7924 _length_heap = length_;
7927 return const_cast<char_t*>(_buffer);
7932 return *_buffer == 0;
7935 bool operator==(const xpath_string& o) const
7937 return strequal(_buffer, o._buffer);
7940 bool operator!=(const xpath_string& o) const
7942 return !strequal(_buffer, o._buffer);
7945 bool uses_heap() const
7953 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7955 while (*pattern && *string == *pattern)
7961 return *pattern == 0;
7964 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7966 #ifdef PUGIXML_WCHAR_MODE
7967 return wcschr(s, c);
7969 return strchr(s, c);
7973 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7975 #ifdef PUGIXML_WCHAR_MODE
7976 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7977 return (*p == 0) ? s : wcsstr(s, p);
7979 return strstr(s, p);
7983 // Converts symbol to lower case, if it is an ASCII one
7984 PUGI__FN char_t tolower_ascii(char_t ch)
7986 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7989 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7992 return xpath_string::from_const(na.attribute().value());
7995 xml_node n = na.node();
8003 return xpath_string::from_const(n.value());
8008 xpath_string result;
8010 // element nodes can have value if parse_embed_pcdata was used
8012 result.append(xpath_string::from_const(n.value()), alloc);
8014 xml_node cur = n.first_child();
8016 while (cur && cur != n)
8018 if (cur.type() == node_pcdata || cur.type() == node_cdata)
8019 result.append(xpath_string::from_const(cur.value()), alloc);
8021 if (cur.first_child())
8022 cur = cur.first_child();
8023 else if (cur.next_sibling())
8024 cur = cur.next_sibling();
8027 while (!cur.next_sibling() && cur != n)
8030 if (cur != n) cur = cur.next_sibling();
8038 return xpath_string();
8043 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8045 assert(ln->parent == rn->parent);
8047 // there is no common ancestor (the shared parent is null), nodes are from different documents
8048 if (!ln->parent) return ln < rn;
8050 // determine sibling order
8051 xml_node_struct* ls = ln;
8052 xml_node_struct* rs = rn;
8056 if (ls == rn) return true;
8057 if (rs == ln) return false;
8059 ls = ls->next_sibling;
8060 rs = rs->next_sibling;
8063 // if rn sibling chain ended ln must be before rn
8067 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8069 // find common ancestor at the same depth, if any
8070 xml_node_struct* lp = ln;
8071 xml_node_struct* rp = rn;
8073 while (lp && rp && lp->parent != rp->parent)
8079 // parents are the same!
8080 if (lp && rp) return node_is_before_sibling(lp, rp);
8082 // nodes are at different depths, need to normalize heights
8083 bool left_higher = !lp;
8097 // one node is the ancestor of the other
8098 if (ln == rn) return left_higher;
8100 // find common ancestor... again
8101 while (ln->parent != rn->parent)
8107 return node_is_before_sibling(ln, rn);
8110 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8112 while (node && node != parent) node = node->parent;
8114 return parent && node == parent;
8117 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
8119 xml_node_struct* node = xnode.node().internal_object();
8123 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8125 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8126 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8132 xml_attribute_struct* attr = xnode.attribute().internal_object();
8136 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8138 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8139 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8148 struct document_order_comparator
8150 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8152 // optimized document order based check
8153 const void* lo = document_buffer_order(lhs);
8154 const void* ro = document_buffer_order(rhs);
8156 if (lo && ro) return lo < ro;
8159 xml_node ln = lhs.node(), rn = rhs.node();
8161 // compare attributes
8162 if (lhs.attribute() && rhs.attribute())
8165 if (lhs.parent() == rhs.parent())
8167 // determine sibling order
8168 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8169 if (a == rhs.attribute())
8175 // compare attribute parents
8179 else if (lhs.attribute())
8181 // attributes go after the parent element
8182 if (lhs.parent() == rhs.node()) return false;
8186 else if (rhs.attribute())
8188 // attributes go after the parent element
8189 if (rhs.parent() == lhs.node()) return true;
8194 if (ln == rn) return false;
8196 if (!ln || !rn) return ln < rn;
8198 return node_is_before(ln.internal_object(), rn.internal_object());
8202 PUGI__FN double gen_nan()
8204 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8205 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8206 typedef uint32_t UI; // BCC5 workaround
8207 union { float f; UI i; } u;
8212 const volatile double zero = 0.0;
8217 PUGI__FN bool is_nan(double value)
8219 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8220 return !!_isnan(value);
8221 #elif defined(fpclassify) && defined(FP_NAN)
8222 return fpclassify(value) == FP_NAN;
8225 const volatile double v = value;
8230 PUGI__FN const char_t* convert_number_to_string_special(double value)
8232 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8233 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8234 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8235 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8236 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8237 switch (fpclassify(value))
8240 return PUGIXML_TEXT("NaN");
8243 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8246 return PUGIXML_TEXT("0");
8253 const volatile double v = value;
8255 if (v == 0) return PUGIXML_TEXT("0");
8256 if (v != v) return PUGIXML_TEXT("NaN");
8257 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8262 PUGI__FN bool convert_number_to_boolean(double value)
8264 return (value != 0 && !is_nan(value));
8267 PUGI__FN void truncate_zeros(char* begin, char* end)
8269 while (begin != end && end[-1] == '0') end--;
8274 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8275 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
8276 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8280 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8282 // truncate redundant zeros
8283 truncate_zeros(buffer, buffer + strlen(buffer));
8286 *out_mantissa = buffer;
8287 *out_exponent = exponent;
8290 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8292 // get a scientific notation value with IEEE DBL_DIG decimals
8293 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8295 // get the exponent (possibly negative)
8296 char* exponent_string = strchr(buffer, 'e');
8297 assert(exponent_string);
8299 int exponent = atoi(exponent_string + 1);
8301 // extract mantissa string: skip sign
8302 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8303 assert(mantissa[0] != '0' && mantissa[1] == '.');
8305 // divide mantissa by 10 to eliminate integer part
8306 mantissa[1] = mantissa[0];
8310 // remove extra mantissa digits and zero-terminate mantissa
8311 truncate_zeros(mantissa, exponent_string);
8314 *out_mantissa = mantissa;
8315 *out_exponent = exponent;
8319 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8321 // try special number conversion
8322 const char_t* special = convert_number_to_string_special(value);
8323 if (special) return xpath_string::from_const(special);
8325 // get mantissa + exponent form
8326 char mantissa_buffer[32];
8330 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8332 // allocate a buffer of suitable length for the number
8333 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8334 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8335 if (!result) return xpath_string();
8341 if (value < 0) *s++ = '-';
8350 while (exponent > 0)
8352 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8353 *s++ = *mantissa ? *mantissa++ : '0';
8364 // extra zeroes from negative exponent
8365 while (exponent < 0)
8371 // extra mantissa digits
8374 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8380 assert(s < result + result_size);
8383 return xpath_string::from_heap_preallocated(result, s);
8386 PUGI__FN bool check_string_to_number_format(const char_t* string)
8388 // parse leading whitespace
8389 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8392 if (*string == '-') ++string;
8394 if (!*string) return false;
8396 // if there is no integer part, there should be a decimal part with at least one digit
8397 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8399 // parse integer part
8400 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8402 // parse decimal part
8407 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8410 // parse trailing whitespace
8411 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8413 return *string == 0;
8416 PUGI__FN double convert_string_to_number(const char_t* string)
8418 // check string format
8419 if (!check_string_to_number_format(string)) return gen_nan();
8422 #ifdef PUGIXML_WCHAR_MODE
8423 return wcstod(string, 0);
8425 return strtod(string, 0);
8429 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8431 size_t length = static_cast<size_t>(end - begin);
8432 char_t* scratch = buffer;
8434 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8436 // need to make dummy on-heap copy
8437 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8438 if (!scratch) return false;
8441 // copy string to zero-terminated buffer and perform conversion
8442 memcpy(scratch, begin, length * sizeof(char_t));
8443 scratch[length] = 0;
8445 *out_result = convert_string_to_number(scratch);
8447 // free dummy buffer
8448 if (scratch != buffer) xml_memory::deallocate(scratch);
8453 PUGI__FN double round_nearest(double value)
8455 return floor(value + 0.5);
8458 PUGI__FN double round_nearest_nzero(double value)
8460 // same as round_nearest, but returns -0 for [-0.5, -0]
8461 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8462 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8465 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8467 return node.attribute() ? node.attribute().name() : node.node().name();
8470 PUGI__FN const char_t* local_name(const xpath_node& node)
8472 const char_t* name = qualified_name(node);
8473 const char_t* p = find_char(name, ':');
8475 return p ? p + 1 : name;
8478 struct namespace_uri_predicate
8480 const char_t* prefix;
8481 size_t prefix_length;
8483 namespace_uri_predicate(const char_t* name)
8485 const char_t* pos = find_char(name, ':');
8487 prefix = pos ? name : 0;
8488 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8491 bool operator()(xml_attribute a) const
8493 const char_t* name = a.name();
8495 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8497 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8501 PUGI__FN const char_t* namespace_uri(xml_node node)
8503 namespace_uri_predicate pred = node.name();
8509 xml_attribute a = p.find_attribute(pred);
8511 if (a) return a.value();
8516 return PUGIXML_TEXT("");
8519 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8521 namespace_uri_predicate pred = attr.name();
8523 // Default namespace does not apply to attributes
8524 if (!pred.prefix) return PUGIXML_TEXT("");
8526 xml_node p = parent;
8530 xml_attribute a = p.find_attribute(pred);
8532 if (a) return a.value();
8537 return PUGIXML_TEXT("");
8540 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8542 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8545 PUGI__FN char_t* normalize_space(char_t* buffer)
8547 char_t* write = buffer;
8549 for (char_t* it = buffer; *it; )
8553 if (PUGI__IS_CHARTYPE(ch, ct_space))
8555 // replace whitespace sequence with single space
8556 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8558 // avoid leading spaces
8559 if (write != buffer) *write++ = ' ';
8564 // remove trailing space
8565 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8573 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8575 char_t* write = buffer;
8579 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8581 const char_t* pos = find_char(from, ch);
8584 *write++ = ch; // do not process
8585 else if (static_cast<size_t>(pos - from) < to_length)
8586 *write++ = to[pos - from]; // replace
8595 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8597 unsigned char table[128] = {0};
8601 unsigned int fc = static_cast<unsigned int>(*from);
8602 unsigned int tc = static_cast<unsigned int>(*to);
8604 if (fc >= 128 || tc >= 128)
8607 // code=128 means "skip character"
8609 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8615 for (int i = 0; i < 128; ++i)
8617 table[i] = static_cast<unsigned char>(i);
8619 void* result = alloc->allocate(sizeof(table));
8620 if (!result) return 0;
8622 memcpy(result, table, sizeof(table));
8624 return static_cast<unsigned char*>(result);
8627 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8629 char_t* write = buffer;
8633 char_t ch = *buffer++;
8634 unsigned int index = static_cast<unsigned int>(ch);
8638 unsigned char code = table[index];
8640 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8641 // this code skips these characters without extra branches
8642 *write = static_cast<char_t>(code);
8643 write += 1 - (code >> 7);
8657 inline bool is_xpath_attribute(const char_t* name)
8659 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8662 struct xpath_variable_boolean: xpath_variable
8664 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8672 struct xpath_variable_number: xpath_variable
8674 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8682 struct xpath_variable_string: xpath_variable
8684 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8688 ~xpath_variable_string()
8690 if (value) xml_memory::deallocate(value);
8697 struct xpath_variable_node_set: xpath_variable
8699 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8703 xpath_node_set value;
8707 static const xpath_node_set dummy_node_set;
8709 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8711 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8712 unsigned int result = 0;
8716 result += static_cast<unsigned int>(*str++);
8717 result += result << 10;
8718 result ^= result >> 6;
8721 result += result << 3;
8722 result ^= result >> 11;
8723 result += result << 15;
8728 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8730 size_t length = strlength(name);
8731 if (length == 0) return 0; // empty variable names are invalid
8733 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8734 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8735 if (!memory) return 0;
8737 T* result = new (memory) T();
8739 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8744 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8748 case xpath_type_node_set:
8749 return new_xpath_variable<xpath_variable_node_set>(name);
8751 case xpath_type_number:
8752 return new_xpath_variable<xpath_variable_number>(name);
8754 case xpath_type_string:
8755 return new_xpath_variable<xpath_variable_string>(name);
8757 case xpath_type_boolean:
8758 return new_xpath_variable<xpath_variable_boolean>(name);
8765 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8768 xml_memory::deallocate(var);
8771 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8775 case xpath_type_node_set:
8776 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8779 case xpath_type_number:
8780 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8783 case xpath_type_string:
8784 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8787 case xpath_type_boolean:
8788 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8792 assert(false && "Invalid variable type"); // unreachable
8796 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8798 switch (rhs->type())
8800 case xpath_type_node_set:
8801 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8803 case xpath_type_number:
8804 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8806 case xpath_type_string:
8807 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8809 case xpath_type_boolean:
8810 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8813 assert(false && "Invalid variable type"); // unreachable
8818 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8820 size_t length = static_cast<size_t>(end - begin);
8821 char_t* scratch = buffer;
8823 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8825 // need to make dummy on-heap copy
8826 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8827 if (!scratch) return false;
8830 // copy string to zero-terminated buffer and perform lookup
8831 memcpy(scratch, begin, length * sizeof(char_t));
8832 scratch[length] = 0;
8834 *out_result = set->get(scratch);
8836 // free dummy buffer
8837 if (scratch != buffer) xml_memory::deallocate(scratch);
8843 // Internal node set class
8845 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8847 if (end - begin < 2)
8848 return xpath_node_set::type_sorted;
8850 document_order_comparator cmp;
8852 bool first = cmp(begin[0], begin[1]);
8854 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8855 if (cmp(it[0], it[1]) != first)
8856 return xpath_node_set::type_unsorted;
8858 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8861 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8863 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8865 if (type == xpath_node_set::type_unsorted)
8867 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8869 if (sorted == xpath_node_set::type_unsorted)
8871 sort(begin, end, document_order_comparator());
8873 type = xpath_node_set::type_sorted;
8879 if (type != order) reverse(begin, end);
8884 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8886 if (begin == end) return xpath_node();
8890 case xpath_node_set::type_sorted:
8893 case xpath_node_set::type_sorted_reverse:
8896 case xpath_node_set::type_unsorted:
8897 return *min_element(begin, end, document_order_comparator());
8900 assert(false && "Invalid node set type"); // unreachable
8901 return xpath_node();
8905 class xpath_node_set_raw
8907 xpath_node_set::type_t _type;
8914 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8918 xpath_node* begin() const
8923 xpath_node* end() const
8930 return _begin == _end;
8935 return static_cast<size_t>(_end - _begin);
8938 xpath_node first() const
8940 return xpath_first(_begin, _end, _type);
8943 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8945 void push_back(const xpath_node& node, xpath_allocator* alloc)
8950 push_back_grow(node, alloc);
8953 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8955 if (begin_ == end_) return;
8957 size_t size_ = static_cast<size_t>(_end - _begin);
8958 size_t capacity = static_cast<size_t>(_eos - _begin);
8959 size_t count = static_cast<size_t>(end_ - begin_);
8961 if (size_ + count > capacity)
8963 // reallocate the old array or allocate a new one
8964 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8969 _end = data + size_;
8970 _eos = data + size_ + count;
8973 memcpy(_end, begin_, count * sizeof(xpath_node));
8979 _type = xpath_sort(_begin, _end, _type, false);
8982 void truncate(xpath_node* pos)
8984 assert(_begin <= pos && pos <= _end);
8989 void remove_duplicates(xpath_allocator* alloc)
8991 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
8993 xpath_allocator_capture cr(alloc);
8995 size_t size_ = static_cast<size_t>(_end - _begin);
8997 size_t hash_size = 1;
8998 while (hash_size < size_ + size_ / 2) hash_size *= 2;
9000 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
9001 if (!hash_data) return;
9003 memset(hash_data, 0, hash_size * sizeof(const void**));
9005 xpath_node* write = _begin;
9007 for (xpath_node* it = _begin; it != _end; ++it)
9009 const void* attr = it->attribute().internal_object();
9010 const void* node = it->node().internal_object();
9011 const void* key = attr ? attr : node;
9013 if (key && hash_insert(hash_data, hash_size, key))
9023 _end = unique(_begin, _end);
9027 xpath_node_set::type_t type() const
9032 void set_type(xpath_node_set::type_t value)
9038 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9040 size_t capacity = static_cast<size_t>(_eos - _begin);
9042 // get new capacity (1.5x rule)
9043 size_t new_capacity = capacity + capacity / 2 + 1;
9045 // reallocate the old array or allocate a new one
9046 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9051 _end = data + capacity;
9052 _eos = data + new_capacity;
9060 struct xpath_context
9063 size_t position, size;
9065 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9078 lex_greater_or_equal,
9090 lex_open_square_brace,
9091 lex_close_square_brace,
9101 struct xpath_lexer_string
9103 const char_t* begin;
9106 xpath_lexer_string(): begin(0), end(0)
9110 bool operator==(const char_t* other) const
9112 size_t length = static_cast<size_t>(end - begin);
9114 return strequalrange(other, begin, length);
9121 const char_t* _cur_lexeme_pos;
9122 xpath_lexer_string _cur_lexeme_contents;
9124 lexeme_t _cur_lexeme;
9127 explicit xpath_lexer(const char_t* query): _cur(query)
9132 const char_t* state() const
9139 const char_t* cur = _cur;
9141 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
9143 // save lexeme position for error reporting
9144 _cur_lexeme_pos = cur;
9149 _cur_lexeme = lex_eof;
9153 if (*(cur+1) == '=')
9156 _cur_lexeme = lex_greater_or_equal;
9161 _cur_lexeme = lex_greater;
9166 if (*(cur+1) == '=')
9169 _cur_lexeme = lex_less_or_equal;
9174 _cur_lexeme = lex_less;
9179 if (*(cur+1) == '=')
9182 _cur_lexeme = lex_not_equal;
9186 _cur_lexeme = lex_none;
9192 _cur_lexeme = lex_equal;
9198 _cur_lexeme = lex_plus;
9204 _cur_lexeme = lex_minus;
9210 _cur_lexeme = lex_multiply;
9216 _cur_lexeme = lex_union;
9223 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9225 _cur_lexeme_contents.begin = cur;
9227 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9229 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9233 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9236 _cur_lexeme_contents.end = cur;
9238 _cur_lexeme = lex_var_ref;
9242 _cur_lexeme = lex_none;
9249 _cur_lexeme = lex_open_brace;
9255 _cur_lexeme = lex_close_brace;
9261 _cur_lexeme = lex_open_square_brace;
9267 _cur_lexeme = lex_close_square_brace;
9273 _cur_lexeme = lex_comma;
9278 if (*(cur+1) == '/')
9281 _cur_lexeme = lex_double_slash;
9286 _cur_lexeme = lex_slash;
9291 if (*(cur+1) == '.')
9294 _cur_lexeme = lex_double_dot;
9296 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9298 _cur_lexeme_contents.begin = cur; // .
9302 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9304 _cur_lexeme_contents.end = cur;
9306 _cur_lexeme = lex_number;
9311 _cur_lexeme = lex_dot;
9317 _cur_lexeme = lex_axis_attribute;
9324 char_t terminator = *cur;
9328 _cur_lexeme_contents.begin = cur;
9329 while (*cur && *cur != terminator) cur++;
9330 _cur_lexeme_contents.end = cur;
9333 _cur_lexeme = lex_none;
9337 _cur_lexeme = lex_quoted_string;
9344 if (*(cur+1) == ':')
9347 _cur_lexeme = lex_double_colon;
9351 _cur_lexeme = lex_none;
9356 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9358 _cur_lexeme_contents.begin = cur;
9360 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9366 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9369 _cur_lexeme_contents.end = cur;
9371 _cur_lexeme = lex_number;
9373 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9375 _cur_lexeme_contents.begin = cur;
9377 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9381 if (cur[1] == '*') // namespace test ncname:*
9385 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9389 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9393 _cur_lexeme_contents.end = cur;
9395 _cur_lexeme = lex_string;
9399 _cur_lexeme = lex_none;
9406 lexeme_t current() const
9411 const char_t* current_pos() const
9413 return _cur_lexeme_pos;
9416 const xpath_lexer_string& contents() const
9418 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9420 return _cur_lexeme_contents;
9427 ast_op_or, // left or right
9428 ast_op_and, // left and right
9429 ast_op_equal, // left = right
9430 ast_op_not_equal, // left != right
9431 ast_op_less, // left < right
9432 ast_op_greater, // left > right
9433 ast_op_less_or_equal, // left <= right
9434 ast_op_greater_or_equal, // left >= right
9435 ast_op_add, // left + right
9436 ast_op_subtract, // left - right
9437 ast_op_multiply, // left * right
9438 ast_op_divide, // left / right
9439 ast_op_mod, // left % right
9440 ast_op_negate, // left - right
9441 ast_op_union, // left | right
9442 ast_predicate, // apply predicate to set; next points to next predicate
9443 ast_filter, // select * from left where right
9444 ast_string_constant, // string constant
9445 ast_number_constant, // number constant
9446 ast_variable, // variable
9447 ast_func_last, // last()
9448 ast_func_position, // position()
9449 ast_func_count, // count(left)
9450 ast_func_id, // id(left)
9451 ast_func_local_name_0, // local-name()
9452 ast_func_local_name_1, // local-name(left)
9453 ast_func_namespace_uri_0, // namespace-uri()
9454 ast_func_namespace_uri_1, // namespace-uri(left)
9455 ast_func_name_0, // name()
9456 ast_func_name_1, // name(left)
9457 ast_func_string_0, // string()
9458 ast_func_string_1, // string(left)
9459 ast_func_concat, // concat(left, right, siblings)
9460 ast_func_starts_with, // starts_with(left, right)
9461 ast_func_contains, // contains(left, right)
9462 ast_func_substring_before, // substring-before(left, right)
9463 ast_func_substring_after, // substring-after(left, right)
9464 ast_func_substring_2, // substring(left, right)
9465 ast_func_substring_3, // substring(left, right, third)
9466 ast_func_string_length_0, // string-length()
9467 ast_func_string_length_1, // string-length(left)
9468 ast_func_normalize_space_0, // normalize-space()
9469 ast_func_normalize_space_1, // normalize-space(left)
9470 ast_func_translate, // translate(left, right, third)
9471 ast_func_boolean, // boolean(left)
9472 ast_func_not, // not(left)
9473 ast_func_true, // true()
9474 ast_func_false, // false()
9475 ast_func_lang, // lang(left)
9476 ast_func_number_0, // number()
9477 ast_func_number_1, // number(left)
9478 ast_func_sum, // sum(left)
9479 ast_func_floor, // floor(left)
9480 ast_func_ceiling, // ceiling(left)
9481 ast_func_round, // round(left)
9482 ast_step, // process set left with step
9483 ast_step_root, // select root node
9485 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9486 ast_opt_compare_attribute // @name = 'string'
9492 axis_ancestor_or_self,
9496 axis_descendant_or_self,
9498 axis_following_sibling,
9502 axis_preceding_sibling,
9511 nodetest_type_comment,
9516 nodetest_all_in_namespace
9524 predicate_constant_one
9534 template <axis_t N> struct axis_to_type
9536 static const axis_t axis;
9539 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9541 class xpath_ast_node
9551 // for ast_step/ast_predicate/ast_filter
9554 // tree node structure
9555 xpath_ast_node* _left;
9556 xpath_ast_node* _right;
9557 xpath_ast_node* _next;
9561 // value for ast_string_constant
9562 const char_t* string;
9563 // value for ast_number_constant
9565 // variable for ast_variable
9566 xpath_variable* variable;
9567 // node test for ast_step (node name/namespace/node type/pi target)
9568 const char_t* nodetest;
9569 // table for ast_opt_translate_table
9570 const unsigned char* table;
9573 xpath_ast_node(const xpath_ast_node&);
9574 xpath_ast_node& operator=(const xpath_ast_node&);
9576 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9578 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9580 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9582 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9583 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9584 else if (lt == xpath_type_number || rt == xpath_type_number)
9585 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9586 else if (lt == xpath_type_string || rt == xpath_type_string)
9588 xpath_allocator_capture cr(stack.result);
9590 xpath_string ls = lhs->eval_string(c, stack);
9591 xpath_string rs = rhs->eval_string(c, stack);
9593 return comp(ls, rs);
9596 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9598 xpath_allocator_capture cr(stack.result);
9600 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9601 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9603 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9604 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9606 xpath_allocator_capture cri(stack.result);
9608 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9616 if (lt == xpath_type_node_set)
9622 if (lt == xpath_type_boolean)
9623 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9624 else if (lt == xpath_type_number)
9626 xpath_allocator_capture cr(stack.result);
9628 double l = lhs->eval_number(c, stack);
9629 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9631 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9633 xpath_allocator_capture cri(stack.result);
9635 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9641 else if (lt == xpath_type_string)
9643 xpath_allocator_capture cr(stack.result);
9645 xpath_string l = lhs->eval_string(c, stack);
9646 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9648 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9650 xpath_allocator_capture cri(stack.result);
9652 if (comp(l, string_value(*ri, stack.result)))
9660 assert(false && "Wrong types"); // unreachable
9664 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9666 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9669 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9671 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9673 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9674 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9675 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9677 xpath_allocator_capture cr(stack.result);
9679 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9680 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9682 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9684 xpath_allocator_capture cri(stack.result);
9686 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9688 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9690 xpath_allocator_capture crii(stack.result);
9692 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9699 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9701 xpath_allocator_capture cr(stack.result);
9703 double l = lhs->eval_number(c, stack);
9704 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9706 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9708 xpath_allocator_capture cri(stack.result);
9710 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9716 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9718 xpath_allocator_capture cr(stack.result);
9720 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9721 double r = rhs->eval_number(c, stack);
9723 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9725 xpath_allocator_capture cri(stack.result);
9727 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9735 assert(false && "Wrong types"); // unreachable
9740 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9742 assert(ns.size() >= first);
9743 assert(expr->rettype() != xpath_type_number);
9746 size_t size = ns.size() - first;
9748 xpath_node* last = ns.begin() + first;
9750 // remove_if... or well, sort of
9751 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9753 xpath_context c(*it, i, size);
9755 if (expr->eval_boolean(c, stack))
9766 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9768 assert(ns.size() >= first);
9769 assert(expr->rettype() == xpath_type_number);
9772 size_t size = ns.size() - first;
9774 xpath_node* last = ns.begin() + first;
9776 // remove_if... or well, sort of
9777 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9779 xpath_context c(*it, i, size);
9781 if (expr->eval_number(c, stack) == static_cast<double>(i))
9792 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9794 assert(ns.size() >= first);
9795 assert(expr->rettype() == xpath_type_number);
9797 size_t size = ns.size() - first;
9799 xpath_node* last = ns.begin() + first;
9801 xpath_context c(xpath_node(), 1, size);
9803 double er = expr->eval_number(c, stack);
9805 if (er >= 1.0 && er <= static_cast<double>(size))
9807 size_t eri = static_cast<size_t>(er);
9809 if (er == static_cast<double>(eri))
9811 xpath_node r = last[eri - 1];
9820 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9822 if (ns.size() == first) return;
9824 assert(_type == ast_filter || _type == ast_predicate);
9826 if (_test == predicate_constant || _test == predicate_constant_one)
9827 apply_predicate_number_const(ns, first, _right, stack);
9828 else if (_right->rettype() == xpath_type_number)
9829 apply_predicate_number(ns, first, _right, stack, once);
9831 apply_predicate_boolean(ns, first, _right, stack, once);
9834 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9836 if (ns.size() == first) return;
9838 bool last_once = eval_once(ns.type(), eval);
9840 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9841 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9844 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9848 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9853 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9855 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9860 case nodetest_type_node:
9862 if (is_xpath_attribute(name))
9864 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9869 case nodetest_all_in_namespace:
9870 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9872 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9884 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9888 xml_node_type type = PUGI__NODETYPE(n);
9893 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9895 ns.push_back(xml_node(n), alloc);
9900 case nodetest_type_node:
9901 ns.push_back(xml_node(n), alloc);
9904 case nodetest_type_comment:
9905 if (type == node_comment)
9907 ns.push_back(xml_node(n), alloc);
9912 case nodetest_type_text:
9913 if (type == node_pcdata || type == node_cdata)
9915 ns.push_back(xml_node(n), alloc);
9920 case nodetest_type_pi:
9921 if (type == node_pi)
9923 ns.push_back(xml_node(n), alloc);
9929 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9931 ns.push_back(xml_node(n), alloc);
9937 if (type == node_element)
9939 ns.push_back(xml_node(n), alloc);
9944 case nodetest_all_in_namespace:
9945 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9947 ns.push_back(xml_node(n), alloc);
9953 assert(false && "Unknown axis"); // unreachable
9959 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9961 const axis_t axis = T::axis;
9965 case axis_attribute:
9967 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9968 if (step_push(ns, a, n, alloc) & once)
9976 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9977 if (step_push(ns, c, alloc) & once)
9983 case axis_descendant:
9984 case axis_descendant_or_self:
9986 if (axis == axis_descendant_or_self)
9987 if (step_push(ns, n, alloc) & once)
9990 xml_node_struct* cur = n->first_child;
9994 if (step_push(ns, cur, alloc) & once)
9997 if (cur->first_child)
9998 cur = cur->first_child;
10001 while (!cur->next_sibling)
10005 if (cur == n) return;
10008 cur = cur->next_sibling;
10015 case axis_following_sibling:
10017 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10018 if (step_push(ns, c, alloc) & once)
10024 case axis_preceding_sibling:
10026 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10027 if (step_push(ns, c, alloc) & once)
10033 case axis_following:
10035 xml_node_struct* cur = n;
10037 // exit from this node so that we don't include descendants
10038 while (!cur->next_sibling)
10045 cur = cur->next_sibling;
10049 if (step_push(ns, cur, alloc) & once)
10052 if (cur->first_child)
10053 cur = cur->first_child;
10056 while (!cur->next_sibling)
10063 cur = cur->next_sibling;
10070 case axis_preceding:
10072 xml_node_struct* cur = n;
10074 // exit from this node so that we don't include descendants
10075 while (!cur->prev_sibling_c->next_sibling)
10082 cur = cur->prev_sibling_c;
10086 if (cur->first_child)
10087 cur = cur->first_child->prev_sibling_c;
10090 // leaf node, can't be ancestor
10091 if (step_push(ns, cur, alloc) & once)
10094 while (!cur->prev_sibling_c->next_sibling)
10100 if (!node_is_ancestor(cur, n))
10101 if (step_push(ns, cur, alloc) & once)
10105 cur = cur->prev_sibling_c;
10112 case axis_ancestor:
10113 case axis_ancestor_or_self:
10115 if (axis == axis_ancestor_or_self)
10116 if (step_push(ns, n, alloc) & once)
10119 xml_node_struct* cur = n->parent;
10123 if (step_push(ns, cur, alloc) & once)
10134 step_push(ns, n, alloc);
10142 step_push(ns, n->parent, alloc);
10148 assert(false && "Unimplemented axis"); // unreachable
10152 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10154 const axis_t axis = T::axis;
10158 case axis_ancestor:
10159 case axis_ancestor_or_self:
10161 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10162 if (step_push(ns, a, p, alloc) & once)
10165 xml_node_struct* cur = p;
10169 if (step_push(ns, cur, alloc) & once)
10178 case axis_descendant_or_self:
10181 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10182 step_push(ns, a, p, alloc);
10187 case axis_following:
10189 xml_node_struct* cur = p;
10193 if (cur->first_child)
10194 cur = cur->first_child;
10197 while (!cur->next_sibling)
10204 cur = cur->next_sibling;
10207 if (step_push(ns, cur, alloc) & once)
10216 step_push(ns, p, alloc);
10221 case axis_preceding:
10223 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10224 step_fill(ns, p, alloc, once, v);
10229 assert(false && "Unimplemented axis"); // unreachable
10233 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10235 const axis_t axis = T::axis;
10236 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10239 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10240 else if (axis_has_attributes && xn.attribute() && xn.parent())
10241 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10244 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10246 const axis_t axis = T::axis;
10247 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10248 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10251 (axis == axis_attribute && _test == nodetest_name) ||
10252 (!_right && eval_once(axis_type, eval)) ||
10253 // coverity[mixed_enums]
10254 (_right && !_right->_next && _right->_test == predicate_constant_one);
10256 xpath_node_set_raw ns;
10257 ns.set_type(axis_type);
10261 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10263 // self axis preserves the original order
10264 if (axis == axis_self) ns.set_type(s.type());
10266 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10268 size_t size = ns.size();
10270 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10271 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10273 step_fill(ns, *it, stack.result, once, v);
10274 if (_right) apply_predicates(ns, size, stack, eval);
10279 step_fill(ns, c.n, stack.result, once, v);
10280 if (_right) apply_predicates(ns, 0, stack, eval);
10283 // child, attribute and self axes always generate unique set of nodes
10284 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10285 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10286 ns.remove_duplicates(stack.temp);
10292 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10293 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10295 assert(type == ast_string_constant);
10296 _data.string = value;
10299 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10300 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10302 assert(type == ast_number_constant);
10303 _data.number = value;
10306 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10307 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10309 assert(type == ast_variable);
10310 _data.variable = value;
10313 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10314 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10318 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10319 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10321 assert(type == ast_step);
10322 _data.nodetest = contents;
10325 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10326 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10328 assert(type == ast_filter || type == ast_predicate);
10331 void set_next(xpath_ast_node* value)
10336 void set_right(xpath_ast_node* value)
10341 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10346 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10349 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10352 return compare_eq(_left, _right, c, stack, equal_to());
10354 case ast_op_not_equal:
10355 return compare_eq(_left, _right, c, stack, not_equal_to());
10358 return compare_rel(_left, _right, c, stack, less());
10360 case ast_op_greater:
10361 return compare_rel(_right, _left, c, stack, less());
10363 case ast_op_less_or_equal:
10364 return compare_rel(_left, _right, c, stack, less_equal());
10366 case ast_op_greater_or_equal:
10367 return compare_rel(_right, _left, c, stack, less_equal());
10369 case ast_func_starts_with:
10371 xpath_allocator_capture cr(stack.result);
10373 xpath_string lr = _left->eval_string(c, stack);
10374 xpath_string rr = _right->eval_string(c, stack);
10376 return starts_with(lr.c_str(), rr.c_str());
10379 case ast_func_contains:
10381 xpath_allocator_capture cr(stack.result);
10383 xpath_string lr = _left->eval_string(c, stack);
10384 xpath_string rr = _right->eval_string(c, stack);
10386 return find_substring(lr.c_str(), rr.c_str()) != 0;
10389 case ast_func_boolean:
10390 return _left->eval_boolean(c, stack);
10393 return !_left->eval_boolean(c, stack);
10395 case ast_func_true:
10398 case ast_func_false:
10401 case ast_func_lang:
10403 if (c.n.attribute()) return false;
10405 xpath_allocator_capture cr(stack.result);
10407 xpath_string lang = _left->eval_string(c, stack);
10409 for (xml_node n = c.n.node(); n; n = n.parent())
10411 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10415 const char_t* value = a.value();
10417 // strnicmp / strncasecmp is not portable
10418 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10420 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10424 return *value == 0 || *value == '-';
10431 case ast_opt_compare_attribute:
10433 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10435 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10437 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10442 assert(_rettype == _data.variable->type());
10444 if (_rettype == xpath_type_boolean)
10445 return _data.variable->get_boolean();
10447 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10455 // none of the ast types that return the value directly matched, we need to perform type conversion
10458 case xpath_type_number:
10459 return convert_number_to_boolean(eval_number(c, stack));
10461 case xpath_type_string:
10463 xpath_allocator_capture cr(stack.result);
10465 return !eval_string(c, stack).empty();
10468 case xpath_type_node_set:
10470 xpath_allocator_capture cr(stack.result);
10472 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10476 assert(false && "Wrong expression for return type boolean"); // unreachable
10481 double eval_number(const xpath_context& c, const xpath_stack& stack)
10486 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10488 case ast_op_subtract:
10489 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10491 case ast_op_multiply:
10492 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10494 case ast_op_divide:
10495 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10498 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10500 case ast_op_negate:
10501 return -_left->eval_number(c, stack);
10503 case ast_number_constant:
10504 return _data.number;
10506 case ast_func_last:
10507 return static_cast<double>(c.size);
10509 case ast_func_position:
10510 return static_cast<double>(c.position);
10512 case ast_func_count:
10514 xpath_allocator_capture cr(stack.result);
10516 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10519 case ast_func_string_length_0:
10521 xpath_allocator_capture cr(stack.result);
10523 return static_cast<double>(string_value(c.n, stack.result).length());
10526 case ast_func_string_length_1:
10528 xpath_allocator_capture cr(stack.result);
10530 return static_cast<double>(_left->eval_string(c, stack).length());
10533 case ast_func_number_0:
10535 xpath_allocator_capture cr(stack.result);
10537 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10540 case ast_func_number_1:
10541 return _left->eval_number(c, stack);
10545 xpath_allocator_capture cr(stack.result);
10549 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10551 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10553 xpath_allocator_capture cri(stack.result);
10555 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10561 case ast_func_floor:
10563 double r = _left->eval_number(c, stack);
10565 return r == r ? floor(r) : r;
10568 case ast_func_ceiling:
10570 double r = _left->eval_number(c, stack);
10572 return r == r ? ceil(r) : r;
10575 case ast_func_round:
10576 return round_nearest_nzero(_left->eval_number(c, stack));
10580 assert(_rettype == _data.variable->type());
10582 if (_rettype == xpath_type_number)
10583 return _data.variable->get_number();
10585 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10593 // none of the ast types that return the value directly matched, we need to perform type conversion
10596 case xpath_type_boolean:
10597 return eval_boolean(c, stack) ? 1 : 0;
10599 case xpath_type_string:
10601 xpath_allocator_capture cr(stack.result);
10603 return convert_string_to_number(eval_string(c, stack).c_str());
10606 case xpath_type_node_set:
10608 xpath_allocator_capture cr(stack.result);
10610 return convert_string_to_number(eval_string(c, stack).c_str());
10614 assert(false && "Wrong expression for return type number"); // unreachable
10619 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10621 assert(_type == ast_func_concat);
10623 xpath_allocator_capture ct(stack.temp);
10625 // count the string number
10627 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10629 // allocate a buffer for temporary string objects
10630 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10631 if (!buffer) return xpath_string();
10633 // evaluate all strings to temporary stack
10634 xpath_stack swapped_stack = {stack.temp, stack.result};
10636 buffer[0] = _left->eval_string(c, swapped_stack);
10639 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10640 assert(pos == count);
10642 // get total length
10644 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10646 // create final string
10647 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10648 if (!result) return xpath_string();
10650 char_t* ri = result;
10652 for (size_t j = 0; j < count; ++j)
10653 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10658 return xpath_string::from_heap_preallocated(result, ri);
10661 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10665 case ast_string_constant:
10666 return xpath_string::from_const(_data.string);
10668 case ast_func_local_name_0:
10670 xpath_node na = c.n;
10672 return xpath_string::from_const(local_name(na));
10675 case ast_func_local_name_1:
10677 xpath_allocator_capture cr(stack.result);
10679 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10680 xpath_node na = ns.first();
10682 return xpath_string::from_const(local_name(na));
10685 case ast_func_name_0:
10687 xpath_node na = c.n;
10689 return xpath_string::from_const(qualified_name(na));
10692 case ast_func_name_1:
10694 xpath_allocator_capture cr(stack.result);
10696 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10697 xpath_node na = ns.first();
10699 return xpath_string::from_const(qualified_name(na));
10702 case ast_func_namespace_uri_0:
10704 xpath_node na = c.n;
10706 return xpath_string::from_const(namespace_uri(na));
10709 case ast_func_namespace_uri_1:
10711 xpath_allocator_capture cr(stack.result);
10713 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10714 xpath_node na = ns.first();
10716 return xpath_string::from_const(namespace_uri(na));
10719 case ast_func_string_0:
10720 return string_value(c.n, stack.result);
10722 case ast_func_string_1:
10723 return _left->eval_string(c, stack);
10725 case ast_func_concat:
10726 return eval_string_concat(c, stack);
10728 case ast_func_substring_before:
10730 xpath_allocator_capture cr(stack.temp);
10732 xpath_stack swapped_stack = {stack.temp, stack.result};
10734 xpath_string s = _left->eval_string(c, swapped_stack);
10735 xpath_string p = _right->eval_string(c, swapped_stack);
10737 const char_t* pos = find_substring(s.c_str(), p.c_str());
10739 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10742 case ast_func_substring_after:
10744 xpath_allocator_capture cr(stack.temp);
10746 xpath_stack swapped_stack = {stack.temp, stack.result};
10748 xpath_string s = _left->eval_string(c, swapped_stack);
10749 xpath_string p = _right->eval_string(c, swapped_stack);
10751 const char_t* pos = find_substring(s.c_str(), p.c_str());
10752 if (!pos) return xpath_string();
10754 const char_t* rbegin = pos + p.length();
10755 const char_t* rend = s.c_str() + s.length();
10757 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10760 case ast_func_substring_2:
10762 xpath_allocator_capture cr(stack.temp);
10764 xpath_stack swapped_stack = {stack.temp, stack.result};
10766 xpath_string s = _left->eval_string(c, swapped_stack);
10767 size_t s_length = s.length();
10769 double first = round_nearest(_right->eval_number(c, stack));
10771 if (is_nan(first)) return xpath_string(); // NaN
10772 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10774 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10775 assert(1 <= pos && pos <= s_length + 1);
10777 const char_t* rbegin = s.c_str() + (pos - 1);
10778 const char_t* rend = s.c_str() + s.length();
10780 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10783 case ast_func_substring_3:
10785 xpath_allocator_capture cr(stack.temp);
10787 xpath_stack swapped_stack = {stack.temp, stack.result};
10789 xpath_string s = _left->eval_string(c, swapped_stack);
10790 size_t s_length = s.length();
10792 double first = round_nearest(_right->eval_number(c, stack));
10793 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10795 if (is_nan(first) || is_nan(last)) return xpath_string();
10796 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10797 else if (first >= last) return xpath_string();
10798 else if (last < 1) return xpath_string();
10800 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10801 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10803 assert(1 <= pos && pos <= end && end <= s_length + 1);
10804 const char_t* rbegin = s.c_str() + (pos - 1);
10805 const char_t* rend = s.c_str() + (end - 1);
10807 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10810 case ast_func_normalize_space_0:
10812 xpath_string s = string_value(c.n, stack.result);
10814 char_t* begin = s.data(stack.result);
10815 if (!begin) return xpath_string();
10817 char_t* end = normalize_space(begin);
10819 return xpath_string::from_heap_preallocated(begin, end);
10822 case ast_func_normalize_space_1:
10824 xpath_string s = _left->eval_string(c, stack);
10826 char_t* begin = s.data(stack.result);
10827 if (!begin) return xpath_string();
10829 char_t* end = normalize_space(begin);
10831 return xpath_string::from_heap_preallocated(begin, end);
10834 case ast_func_translate:
10836 xpath_allocator_capture cr(stack.temp);
10838 xpath_stack swapped_stack = {stack.temp, stack.result};
10840 xpath_string s = _left->eval_string(c, stack);
10841 xpath_string from = _right->eval_string(c, swapped_stack);
10842 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10844 char_t* begin = s.data(stack.result);
10845 if (!begin) return xpath_string();
10847 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10849 return xpath_string::from_heap_preallocated(begin, end);
10852 case ast_opt_translate_table:
10854 xpath_string s = _left->eval_string(c, stack);
10856 char_t* begin = s.data(stack.result);
10857 if (!begin) return xpath_string();
10859 char_t* end = translate_table(begin, _data.table);
10861 return xpath_string::from_heap_preallocated(begin, end);
10866 assert(_rettype == _data.variable->type());
10868 if (_rettype == xpath_type_string)
10869 return xpath_string::from_const(_data.variable->get_string());
10871 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10879 // none of the ast types that return the value directly matched, we need to perform type conversion
10882 case xpath_type_boolean:
10883 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10885 case xpath_type_number:
10886 return convert_number_to_string(eval_number(c, stack), stack.result);
10888 case xpath_type_node_set:
10890 xpath_allocator_capture cr(stack.temp);
10892 xpath_stack swapped_stack = {stack.temp, stack.result};
10894 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10895 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10899 assert(false && "Wrong expression for return type string"); // unreachable
10900 return xpath_string();
10904 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10910 xpath_allocator_capture cr(stack.temp);
10912 xpath_stack swapped_stack = {stack.temp, stack.result};
10914 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
10915 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
10917 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10918 ls.set_type(xpath_node_set::type_unsorted);
10920 ls.append(rs.begin(), rs.end(), stack.result);
10921 ls.remove_duplicates(stack.temp);
10928 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10930 // either expression is a number or it contains position() call; sort by document order
10931 if (_test != predicate_posinv) set.sort_do();
10933 bool once = eval_once(set.type(), eval);
10935 apply_predicate(set, 0, stack, once);
10941 return xpath_node_set_raw();
10947 case axis_ancestor:
10948 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10950 case axis_ancestor_or_self:
10951 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10953 case axis_attribute:
10954 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10957 return step_do(c, stack, eval, axis_to_type<axis_child>());
10959 case axis_descendant:
10960 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10962 case axis_descendant_or_self:
10963 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10965 case axis_following:
10966 return step_do(c, stack, eval, axis_to_type<axis_following>());
10968 case axis_following_sibling:
10969 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10971 case axis_namespace:
10972 // namespaced axis is not supported
10973 return xpath_node_set_raw();
10976 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10978 case axis_preceding:
10979 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10981 case axis_preceding_sibling:
10982 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10985 return step_do(c, stack, eval, axis_to_type<axis_self>());
10988 assert(false && "Unknown axis"); // unreachable
10989 return xpath_node_set_raw();
10993 case ast_step_root:
10995 assert(!_right); // root step can't have any predicates
10997 xpath_node_set_raw ns;
10999 ns.set_type(xpath_node_set::type_sorted);
11001 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
11002 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
11009 assert(_rettype == _data.variable->type());
11011 if (_rettype == xpath_type_node_set)
11013 const xpath_node_set& s = _data.variable->get_node_set();
11015 xpath_node_set_raw ns;
11017 ns.set_type(s.type());
11018 ns.append(s.begin(), s.end(), stack.result);
11023 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11031 // none of the ast types that return the value directly matched, but conversions to node set are invalid
11032 assert(false && "Wrong expression for return type node set"); // unreachable
11033 return xpath_node_set_raw();
11036 void optimize(xpath_allocator* alloc)
11039 _left->optimize(alloc);
11042 _right->optimize(alloc);
11045 _next->optimize(alloc);
11047 // coverity[var_deref_model]
11048 optimize_self(alloc);
11051 void optimize_self(xpath_allocator* alloc)
11053 // Rewrite [position()=expr] with [expr]
11054 // Note that this step has to go before classification to recognize [position()=1]
11055 if ((_type == ast_filter || _type == ast_predicate) &&
11056 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11057 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11059 _right = _right->_right;
11062 // Classify filter/predicate ops to perform various optimizations during evaluation
11063 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11065 assert(_test == predicate_default);
11067 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11068 _test = predicate_constant_one;
11069 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11070 _test = predicate_constant;
11071 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11072 _test = predicate_posinv;
11075 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
11076 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11077 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11078 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11079 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11080 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11083 if (_axis == axis_child || _axis == axis_descendant)
11084 _axis = axis_descendant;
11086 _axis = axis_descendant_or_self;
11088 _left = _left->_left;
11091 // Use optimized lookup table implementation for translate() with constant arguments
11092 if (_type == ast_func_translate &&
11093 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11094 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11096 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11100 _type = ast_opt_translate_table;
11101 _data.table = table;
11105 // Use optimized path for @attr = 'value' or @attr = $value
11106 if (_type == ast_op_equal &&
11107 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11108 // coverity[mixed_enums]
11109 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11110 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11112 _type = ast_opt_compare_attribute;
11116 bool is_posinv_expr() const
11120 case ast_func_position:
11121 case ast_func_last:
11124 case ast_string_constant:
11125 case ast_number_constant:
11130 case ast_step_root:
11133 case ast_predicate:
11138 if (_left && !_left->is_posinv_expr()) return false;
11140 for (xpath_ast_node* n = _right; n; n = n->_next)
11141 if (!n->is_posinv_expr()) return false;
11147 bool is_posinv_step() const
11149 assert(_type == ast_step);
11151 for (xpath_ast_node* n = _right; n; n = n->_next)
11153 assert(n->_type == ast_predicate);
11155 if (n->_test != predicate_posinv)
11162 xpath_value_type rettype() const
11164 return static_cast<xpath_value_type>(_rettype);
11168 static const size_t xpath_ast_depth_limit =
11169 #ifdef PUGIXML_XPATH_DEPTH_LIMIT
11170 PUGIXML_XPATH_DEPTH_LIMIT
11176 struct xpath_parser
11178 xpath_allocator* _alloc;
11179 xpath_lexer _lexer;
11181 const char_t* _query;
11182 xpath_variable_set* _variables;
11184 xpath_parse_result* _result;
11186 char_t _scratch[32];
11190 xpath_ast_node* error(const char* message)
11192 _result->error = message;
11193 _result->offset = _lexer.current_pos() - _query;
11198 xpath_ast_node* error_oom()
11200 assert(_alloc->_error);
11201 *_alloc->_error = true;
11206 xpath_ast_node* error_rec()
11208 return error("Exceeded maximum allowed query depth");
11213 return _alloc->allocate(sizeof(xpath_ast_node));
11216 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11218 void* memory = alloc_node();
11219 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11222 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11224 void* memory = alloc_node();
11225 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11228 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11230 void* memory = alloc_node();
11231 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11234 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11236 void* memory = alloc_node();
11237 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11240 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11242 void* memory = alloc_node();
11243 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11246 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11248 void* memory = alloc_node();
11249 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11252 const char_t* alloc_string(const xpath_lexer_string& value)
11255 return PUGIXML_TEXT("");
11257 size_t length = static_cast<size_t>(value.end - value.begin);
11259 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11262 memcpy(c, value.begin, length * sizeof(char_t));
11268 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11270 switch (name.begin[0])
11273 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11274 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11279 if (name == PUGIXML_TEXT("count") && argc == 1)
11281 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11282 return alloc_node(ast_func_count, xpath_type_number, args[0]);
11284 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11285 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11286 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11287 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11288 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11289 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11294 if (name == PUGIXML_TEXT("false") && argc == 0)
11295 return alloc_node(ast_func_false, xpath_type_boolean);
11296 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11297 return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11302 if (name == PUGIXML_TEXT("id") && argc == 1)
11303 return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11308 if (name == PUGIXML_TEXT("last") && argc == 0)
11309 return alloc_node(ast_func_last, xpath_type_number);
11310 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11311 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11312 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11314 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11315 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11321 if (name == PUGIXML_TEXT("name") && argc <= 1)
11323 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11324 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11326 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11328 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11329 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11331 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11332 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11333 else if (name == PUGIXML_TEXT("not") && argc == 1)
11334 return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11335 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11336 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11341 if (name == PUGIXML_TEXT("position") && argc == 0)
11342 return alloc_node(ast_func_position, xpath_type_number);
11347 if (name == PUGIXML_TEXT("round") && argc == 1)
11348 return alloc_node(ast_func_round, xpath_type_number, args[0]);
11353 if (name == PUGIXML_TEXT("string") && argc <= 1)
11354 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11355 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11356 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11357 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11358 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11359 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11360 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11361 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11362 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11363 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11364 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11365 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11367 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11368 return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11374 if (name == PUGIXML_TEXT("translate") && argc == 3)
11375 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11376 else if (name == PUGIXML_TEXT("true") && argc == 0)
11377 return alloc_node(ast_func_true, xpath_type_boolean);
11385 return error("Unrecognized function or wrong parameter count");
11388 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11392 switch (name.begin[0])
11395 if (name == PUGIXML_TEXT("ancestor"))
11396 return axis_ancestor;
11397 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11398 return axis_ancestor_or_self;
11399 else if (name == PUGIXML_TEXT("attribute"))
11400 return axis_attribute;
11405 if (name == PUGIXML_TEXT("child"))
11411 if (name == PUGIXML_TEXT("descendant"))
11412 return axis_descendant;
11413 else if (name == PUGIXML_TEXT("descendant-or-self"))
11414 return axis_descendant_or_self;
11419 if (name == PUGIXML_TEXT("following"))
11420 return axis_following;
11421 else if (name == PUGIXML_TEXT("following-sibling"))
11422 return axis_following_sibling;
11427 if (name == PUGIXML_TEXT("namespace"))
11428 return axis_namespace;
11433 if (name == PUGIXML_TEXT("parent"))
11434 return axis_parent;
11435 else if (name == PUGIXML_TEXT("preceding"))
11436 return axis_preceding;
11437 else if (name == PUGIXML_TEXT("preceding-sibling"))
11438 return axis_preceding_sibling;
11443 if (name == PUGIXML_TEXT("self"))
11456 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11458 switch (name.begin[0])
11461 if (name == PUGIXML_TEXT("comment"))
11462 return nodetest_type_comment;
11467 if (name == PUGIXML_TEXT("node"))
11468 return nodetest_type_node;
11473 if (name == PUGIXML_TEXT("processing-instruction"))
11474 return nodetest_type_pi;
11479 if (name == PUGIXML_TEXT("text"))
11480 return nodetest_type_text;
11488 return nodetest_none;
11491 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11492 xpath_ast_node* parse_primary_expression()
11494 switch (_lexer.current())
11498 xpath_lexer_string name = _lexer.contents();
11501 return error("Unknown variable: variable set is not provided");
11503 xpath_variable* var = 0;
11504 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11505 return error_oom();
11508 return error("Unknown variable: variable set does not contain the given name");
11512 return alloc_node(ast_variable, var->type(), var);
11515 case lex_open_brace:
11519 xpath_ast_node* n = parse_expression();
11522 if (_lexer.current() != lex_close_brace)
11523 return error("Expected ')' to match an opening '('");
11530 case lex_quoted_string:
11532 const char_t* value = alloc_string(_lexer.contents());
11533 if (!value) return 0;
11537 return alloc_node(ast_string_constant, xpath_type_string, value);
11544 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11545 return error_oom();
11549 return alloc_node(ast_number_constant, xpath_type_number, value);
11554 xpath_ast_node* args[2] = {0};
11557 xpath_lexer_string function = _lexer.contents();
11560 xpath_ast_node* last_arg = 0;
11562 if (_lexer.current() != lex_open_brace)
11563 return error("Unrecognized function call");
11566 size_t old_depth = _depth;
11568 while (_lexer.current() != lex_close_brace)
11572 if (_lexer.current() != lex_comma)
11573 return error("No comma between function arguments");
11577 if (++_depth > xpath_ast_depth_limit)
11578 return error_rec();
11580 xpath_ast_node* n = parse_expression();
11583 if (argc < 2) args[argc] = n;
11584 else last_arg->set_next(n);
11592 _depth = old_depth;
11594 return parse_function(function, argc, args);
11598 return error("Unrecognizable primary expression");
11602 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11603 // Predicate ::= '[' PredicateExpr ']'
11604 // PredicateExpr ::= Expr
11605 xpath_ast_node* parse_filter_expression()
11607 xpath_ast_node* n = parse_primary_expression();
11610 size_t old_depth = _depth;
11612 while (_lexer.current() == lex_open_square_brace)
11616 if (++_depth > xpath_ast_depth_limit)
11617 return error_rec();
11619 if (n->rettype() != xpath_type_node_set)
11620 return error("Predicate has to be applied to node set");
11622 xpath_ast_node* expr = parse_expression();
11623 if (!expr) return 0;
11625 n = alloc_node(ast_filter, n, expr, predicate_default);
11628 if (_lexer.current() != lex_close_square_brace)
11629 return error("Expected ']' to match an opening '['");
11634 _depth = old_depth;
11639 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11640 // AxisSpecifier ::= AxisName '::' | '@'?
11641 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11642 // NameTest ::= '*' | NCName ':' '*' | QName
11643 // AbbreviatedStep ::= '.' | '..'
11644 xpath_ast_node* parse_step(xpath_ast_node* set)
11646 if (set && set->rettype() != xpath_type_node_set)
11647 return error("Step has to be applied to node set");
11649 bool axis_specified = false;
11650 axis_t axis = axis_child; // implied child axis
11652 if (_lexer.current() == lex_axis_attribute)
11654 axis = axis_attribute;
11655 axis_specified = true;
11659 else if (_lexer.current() == lex_dot)
11663 if (_lexer.current() == lex_open_square_brace)
11664 return error("Predicates are not allowed after an abbreviated step");
11666 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11668 else if (_lexer.current() == lex_double_dot)
11672 if (_lexer.current() == lex_open_square_brace)
11673 return error("Predicates are not allowed after an abbreviated step");
11675 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11678 nodetest_t nt_type = nodetest_none;
11679 xpath_lexer_string nt_name;
11681 if (_lexer.current() == lex_string)
11684 nt_name = _lexer.contents();
11687 // was it an axis name?
11688 if (_lexer.current() == lex_double_colon)
11691 if (axis_specified)
11692 return error("Two axis specifiers in one step");
11694 axis = parse_axis_name(nt_name, axis_specified);
11696 if (!axis_specified)
11697 return error("Unknown axis");
11699 // read actual node test
11702 if (_lexer.current() == lex_multiply)
11704 nt_type = nodetest_all;
11705 nt_name = xpath_lexer_string();
11708 else if (_lexer.current() == lex_string)
11710 nt_name = _lexer.contents();
11715 return error("Unrecognized node test");
11719 if (nt_type == nodetest_none)
11721 // node type test or processing-instruction
11722 if (_lexer.current() == lex_open_brace)
11726 if (_lexer.current() == lex_close_brace)
11730 nt_type = parse_node_test_type(nt_name);
11732 if (nt_type == nodetest_none)
11733 return error("Unrecognized node type");
11735 nt_name = xpath_lexer_string();
11737 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11739 if (_lexer.current() != lex_quoted_string)
11740 return error("Only literals are allowed as arguments to processing-instruction()");
11742 nt_type = nodetest_pi;
11743 nt_name = _lexer.contents();
11746 if (_lexer.current() != lex_close_brace)
11747 return error("Unmatched brace near processing-instruction()");
11752 return error("Unmatched brace near node type test");
11755 // QName or NCName:*
11758 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11760 nt_name.end--; // erase *
11762 nt_type = nodetest_all_in_namespace;
11766 nt_type = nodetest_name;
11771 else if (_lexer.current() == lex_multiply)
11773 nt_type = nodetest_all;
11778 return error("Unrecognized node test");
11781 const char_t* nt_name_copy = alloc_string(nt_name);
11782 if (!nt_name_copy) return 0;
11784 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11787 size_t old_depth = _depth;
11789 xpath_ast_node* last = 0;
11791 while (_lexer.current() == lex_open_square_brace)
11795 if (++_depth > xpath_ast_depth_limit)
11796 return error_rec();
11798 xpath_ast_node* expr = parse_expression();
11799 if (!expr) return 0;
11801 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11802 if (!pred) return 0;
11804 if (_lexer.current() != lex_close_square_brace)
11805 return error("Expected ']' to match an opening '['");
11808 if (last) last->set_next(pred);
11809 else n->set_right(pred);
11814 _depth = old_depth;
11819 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11820 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11822 xpath_ast_node* n = parse_step(set);
11825 size_t old_depth = _depth;
11827 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11829 lexeme_t l = _lexer.current();
11832 if (l == lex_double_slash)
11834 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11840 if (++_depth > xpath_ast_depth_limit)
11841 return error_rec();
11847 _depth = old_depth;
11852 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11853 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11854 xpath_ast_node* parse_location_path()
11856 if (_lexer.current() == lex_slash)
11860 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11863 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11864 lexeme_t l = _lexer.current();
11866 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11867 return parse_relative_location_path(n);
11871 else if (_lexer.current() == lex_double_slash)
11875 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11878 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11881 return parse_relative_location_path(n);
11884 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11885 return parse_relative_location_path(0);
11888 // PathExpr ::= LocationPath
11890 // | FilterExpr '/' RelativeLocationPath
11891 // | FilterExpr '//' RelativeLocationPath
11892 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11893 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11894 xpath_ast_node* parse_path_or_unary_expression()
11897 // PathExpr begins with either LocationPath or FilterExpr.
11898 // FilterExpr begins with PrimaryExpr
11899 // PrimaryExpr begins with '$' in case of it being a variable reference,
11900 // '(' in case of it being an expression, string literal, number constant or
11902 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11903 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11904 _lexer.current() == lex_string)
11906 if (_lexer.current() == lex_string)
11908 // This is either a function call, or not - if not, we shall proceed with location path
11909 const char_t* state = _lexer.state();
11911 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11914 return parse_location_path();
11916 // This looks like a function call; however this still can be a node-test. Check it.
11917 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11918 return parse_location_path();
11921 xpath_ast_node* n = parse_filter_expression();
11924 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11926 lexeme_t l = _lexer.current();
11929 if (l == lex_double_slash)
11931 if (n->rettype() != xpath_type_node_set)
11932 return error("Step has to be applied to node set");
11934 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11938 // select from location path
11939 return parse_relative_location_path(n);
11944 else if (_lexer.current() == lex_minus)
11948 // precedence 7+ - only parses union expressions
11949 xpath_ast_node* n = parse_expression(7);
11952 return alloc_node(ast_op_negate, xpath_type_number, n);
11956 return parse_location_path();
11962 ast_type_t asttype;
11963 xpath_value_type rettype;
11966 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11970 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11974 static binary_op_t parse(xpath_lexer& lexer)
11976 switch (lexer.current())
11979 if (lexer.contents() == PUGIXML_TEXT("or"))
11980 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11981 else if (lexer.contents() == PUGIXML_TEXT("and"))
11982 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11983 else if (lexer.contents() == PUGIXML_TEXT("div"))
11984 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11985 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11986 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11988 return binary_op_t();
11991 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11993 case lex_not_equal:
11994 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11997 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
12000 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
12002 case lex_less_or_equal:
12003 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
12005 case lex_greater_or_equal:
12006 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
12009 return binary_op_t(ast_op_add, xpath_type_number, 5);
12012 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12015 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12018 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12021 return binary_op_t();
12026 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12028 binary_op_t op = binary_op_t::parse(_lexer);
12030 while (op.asttype != ast_unknown && op.precedence >= limit)
12034 if (++_depth > xpath_ast_depth_limit)
12035 return error_rec();
12037 xpath_ast_node* rhs = parse_path_or_unary_expression();
12038 if (!rhs) return 0;
12040 binary_op_t nextop = binary_op_t::parse(_lexer);
12042 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12044 rhs = parse_expression_rec(rhs, nextop.precedence);
12045 if (!rhs) return 0;
12047 nextop = binary_op_t::parse(_lexer);
12050 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12051 return error("Union operator has to be applied to node sets");
12053 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12054 if (!lhs) return 0;
12056 op = binary_op_t::parse(_lexer);
12063 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12064 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12065 // EqualityExpr ::= RelationalExpr
12066 // | EqualityExpr '=' RelationalExpr
12067 // | EqualityExpr '!=' RelationalExpr
12068 // RelationalExpr ::= AdditiveExpr
12069 // | RelationalExpr '<' AdditiveExpr
12070 // | RelationalExpr '>' AdditiveExpr
12071 // | RelationalExpr '<=' AdditiveExpr
12072 // | RelationalExpr '>=' AdditiveExpr
12073 // AdditiveExpr ::= MultiplicativeExpr
12074 // | AdditiveExpr '+' MultiplicativeExpr
12075 // | AdditiveExpr '-' MultiplicativeExpr
12076 // MultiplicativeExpr ::= UnaryExpr
12077 // | MultiplicativeExpr '*' UnaryExpr
12078 // | MultiplicativeExpr 'div' UnaryExpr
12079 // | MultiplicativeExpr 'mod' UnaryExpr
12080 xpath_ast_node* parse_expression(int limit = 0)
12082 size_t old_depth = _depth;
12084 if (++_depth > xpath_ast_depth_limit)
12085 return error_rec();
12087 xpath_ast_node* n = parse_path_or_unary_expression();
12090 n = parse_expression_rec(n, limit);
12092 _depth = old_depth;
12097 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12101 xpath_ast_node* parse()
12103 xpath_ast_node* n = parse_expression();
12106 assert(_depth == 0);
12108 // check if there are unparsed tokens left
12109 if (_lexer.current() != lex_eof)
12110 return error("Incorrect query");
12115 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12117 xpath_parser parser(query, variables, alloc, result);
12119 return parser.parse();
12123 struct xpath_query_impl
12125 static xpath_query_impl* create()
12127 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12128 if (!memory) return 0;
12130 return new (memory) xpath_query_impl();
12133 static void destroy(xpath_query_impl* impl)
12135 // free all allocated pages
12136 impl->alloc.release();
12138 // free allocator memory (with the first page)
12139 xml_memory::deallocate(impl);
12142 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12145 block.capacity = sizeof(block.data);
12148 xpath_ast_node* root;
12149 xpath_allocator alloc;
12150 xpath_memory_block block;
12154 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12156 if (!impl) return 0;
12158 if (impl->root->rettype() != xpath_type_node_set)
12160 #ifdef PUGIXML_NO_EXCEPTIONS
12163 xpath_parse_result res;
12164 res.error = "Expression does not evaluate to node set";
12166 throw xpath_exception(res);
12176 #ifndef PUGIXML_NO_EXCEPTIONS
12177 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12179 assert(_result.error);
12182 PUGI__FN const char* xpath_exception::what() const throw()
12184 return _result.error;
12187 PUGI__FN const xpath_parse_result& xpath_exception::result() const
12193 PUGI__FN xpath_node::xpath_node()
12197 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12201 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12205 PUGI__FN xml_node xpath_node::node() const
12207 return _attribute ? xml_node() : _node;
12210 PUGI__FN xml_attribute xpath_node::attribute() const
12215 PUGI__FN xml_node xpath_node::parent() const
12217 return _attribute ? _node : _node.parent();
12220 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
12224 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
12226 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12229 PUGI__FN bool xpath_node::operator!() const
12231 return !(_node || _attribute);
12234 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
12236 return _node == n._node && _attribute == n._attribute;
12239 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
12241 return _node != n._node || _attribute != n._attribute;
12244 #ifdef __BORLANDC__
12245 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12247 return (bool)lhs && rhs;
12250 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12252 return (bool)lhs || rhs;
12256 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12258 assert(begin_ <= end_);
12260 size_t size_ = static_cast<size_t>(end_ - begin_);
12262 // use internal buffer for 0 or 1 elements, heap buffer otherwise
12263 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12267 #ifdef PUGIXML_NO_EXCEPTIONS
12270 throw std::bad_alloc();
12274 // deallocate old buffer
12275 if (_begin != _storage)
12276 impl::xml_memory::deallocate(_begin);
12278 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12280 memcpy(storage, begin_, size_ * sizeof(xpath_node));
12283 _end = storage + size_;
12287 #ifdef PUGIXML_HAS_MOVE
12288 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12291 _storage[0] = rhs._storage[0];
12292 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12293 _end = _begin + (rhs._end - rhs._begin);
12295 rhs._type = type_unsorted;
12296 rhs._begin = rhs._storage;
12297 rhs._end = rhs._storage;
12301 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12305 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12307 _assign(begin_, end_, type_);
12310 PUGI__FN xpath_node_set::~xpath_node_set()
12312 if (_begin != _storage)
12313 impl::xml_memory::deallocate(_begin);
12316 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12318 _assign(ns._begin, ns._end, ns._type);
12321 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12323 if (this == &ns) return *this;
12325 _assign(ns._begin, ns._end, ns._type);
12330 #ifdef PUGIXML_HAS_MOVE
12331 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12336 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12338 if (this == &rhs) return *this;
12340 if (_begin != _storage)
12341 impl::xml_memory::deallocate(_begin);
12349 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12354 PUGI__FN size_t xpath_node_set::size() const
12356 return _end - _begin;
12359 PUGI__FN bool xpath_node_set::empty() const
12361 return _begin == _end;
12364 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12366 assert(index < size());
12367 return _begin[index];
12370 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12375 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12380 PUGI__FN void xpath_node_set::sort(bool reverse)
12382 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12385 PUGI__FN xpath_node xpath_node_set::first() const
12387 return impl::xpath_first(_begin, _end, _type);
12390 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12394 PUGI__FN xpath_parse_result::operator bool() const
12399 PUGI__FN const char* xpath_parse_result::description() const
12401 return error ? error : "No error";
12404 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12408 PUGI__FN const char_t* xpath_variable::name() const
12412 case xpath_type_node_set:
12413 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12415 case xpath_type_number:
12416 return static_cast<const impl::xpath_variable_number*>(this)->name;
12418 case xpath_type_string:
12419 return static_cast<const impl::xpath_variable_string*>(this)->name;
12421 case xpath_type_boolean:
12422 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12425 assert(false && "Invalid variable type"); // unreachable
12430 PUGI__FN xpath_value_type xpath_variable::type() const
12435 PUGI__FN bool xpath_variable::get_boolean() const
12437 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12440 PUGI__FN double xpath_variable::get_number() const
12442 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12445 PUGI__FN const char_t* xpath_variable::get_string() const
12447 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12448 return value ? value : PUGIXML_TEXT("");
12451 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12453 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12456 PUGI__FN bool xpath_variable::set(bool value)
12458 if (_type != xpath_type_boolean) return false;
12460 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12464 PUGI__FN bool xpath_variable::set(double value)
12466 if (_type != xpath_type_number) return false;
12468 static_cast<impl::xpath_variable_number*>(this)->value = value;
12472 PUGI__FN bool xpath_variable::set(const char_t* value)
12474 if (_type != xpath_type_string) return false;
12476 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12478 // duplicate string
12479 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12481 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12482 if (!copy) return false;
12484 memcpy(copy, value, size);
12486 // replace old string
12487 if (var->value) impl::xml_memory::deallocate(var->value);
12493 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12495 if (_type != xpath_type_node_set) return false;
12497 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12501 PUGI__FN xpath_variable_set::xpath_variable_set()
12503 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12507 PUGI__FN xpath_variable_set::~xpath_variable_set()
12509 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12510 _destroy(_data[i]);
12513 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12515 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12521 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12523 if (this == &rhs) return *this;
12530 #ifdef PUGIXML_HAS_MOVE
12531 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12533 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12535 _data[i] = rhs._data[i];
12540 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12542 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12544 _destroy(_data[i]);
12546 _data[i] = rhs._data[i];
12554 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12556 xpath_variable_set temp;
12558 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12559 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12565 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12567 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12569 xpath_variable* chain = _data[i];
12571 _data[i] = rhs._data[i];
12572 rhs._data[i] = chain;
12576 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12578 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12579 size_t hash = impl::hash_string(name) % hash_size;
12581 // look for existing variable
12582 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12583 if (impl::strequal(var->name(), name))
12589 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12591 xpath_variable* last = 0;
12595 // allocate storage for new variable
12596 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12597 if (!nvar) return false;
12599 // link the variable to the result immediately to handle failures gracefully
12601 last->_next = nvar;
12603 *out_result = nvar;
12607 // copy the value; this can fail due to out-of-memory conditions
12608 if (!impl::copy_xpath_variable(nvar, var)) return false;
12616 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12620 xpath_variable* next = var->_next;
12622 impl::delete_xpath_variable(var->_type, var);
12628 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12630 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12631 size_t hash = impl::hash_string(name) % hash_size;
12633 // look for existing variable
12634 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12635 if (impl::strequal(var->name(), name))
12636 return var->type() == type ? var : 0;
12638 // add new variable
12639 xpath_variable* result = impl::new_xpath_variable(type, name);
12643 result->_next = _data[hash];
12645 _data[hash] = result;
12651 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12653 xpath_variable* var = add(name, xpath_type_boolean);
12654 return var ? var->set(value) : false;
12657 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12659 xpath_variable* var = add(name, xpath_type_number);
12660 return var ? var->set(value) : false;
12663 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12665 xpath_variable* var = add(name, xpath_type_string);
12666 return var ? var->set(value) : false;
12669 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12671 xpath_variable* var = add(name, xpath_type_node_set);
12672 return var ? var->set(value) : false;
12675 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12677 return _find(name);
12680 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12682 return _find(name);
12685 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12687 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12691 #ifdef PUGIXML_NO_EXCEPTIONS
12692 _result.error = "Out of memory";
12694 throw std::bad_alloc();
12699 using impl::auto_deleter; // MSVC7 workaround
12700 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12702 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12706 qimpl->root->optimize(&qimpl->alloc);
12708 _impl = impl.release();
12713 #ifdef PUGIXML_NO_EXCEPTIONS
12714 if (qimpl->oom) _result.error = "Out of memory";
12716 if (qimpl->oom) throw std::bad_alloc();
12717 throw xpath_exception(_result);
12723 PUGI__FN xpath_query::xpath_query(): _impl(0)
12727 PUGI__FN xpath_query::~xpath_query()
12730 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12733 #ifdef PUGIXML_HAS_MOVE
12734 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12737 _result = rhs._result;
12739 rhs._result = xpath_parse_result();
12742 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12744 if (this == &rhs) return *this;
12747 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12750 _result = rhs._result;
12752 rhs._result = xpath_parse_result();
12758 PUGI__FN xpath_value_type xpath_query::return_type() const
12760 if (!_impl) return xpath_type_none;
12762 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12765 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12767 if (!_impl) return false;
12769 impl::xpath_context c(n, 1, 1);
12770 impl::xpath_stack_data sd;
12772 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12776 #ifdef PUGIXML_NO_EXCEPTIONS
12779 throw std::bad_alloc();
12786 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12788 if (!_impl) return impl::gen_nan();
12790 impl::xpath_context c(n, 1, 1);
12791 impl::xpath_stack_data sd;
12793 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12797 #ifdef PUGIXML_NO_EXCEPTIONS
12798 return impl::gen_nan();
12800 throw std::bad_alloc();
12807 #ifndef PUGIXML_NO_STL
12808 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12810 if (!_impl) return string_t();
12812 impl::xpath_context c(n, 1, 1);
12813 impl::xpath_stack_data sd;
12815 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12819 #ifdef PUGIXML_NO_EXCEPTIONS
12822 throw std::bad_alloc();
12826 return string_t(r.c_str(), r.length());
12830 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12832 impl::xpath_context c(n, 1, 1);
12833 impl::xpath_stack_data sd;
12835 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12839 #ifdef PUGIXML_NO_EXCEPTIONS
12840 r = impl::xpath_string();
12842 throw std::bad_alloc();
12846 size_t full_size = r.length() + 1;
12850 size_t size = (full_size < capacity) ? full_size : capacity;
12853 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12854 buffer[size - 1] = 0;
12860 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12862 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12863 if (!root) return xpath_node_set();
12865 impl::xpath_context c(n, 1, 1);
12866 impl::xpath_stack_data sd;
12868 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12872 #ifdef PUGIXML_NO_EXCEPTIONS
12873 return xpath_node_set();
12875 throw std::bad_alloc();
12879 return xpath_node_set(r.begin(), r.end(), r.type());
12882 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12884 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12885 if (!root) return xpath_node();
12887 impl::xpath_context c(n, 1, 1);
12888 impl::xpath_stack_data sd;
12890 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12894 #ifdef PUGIXML_NO_EXCEPTIONS
12895 return xpath_node();
12897 throw std::bad_alloc();
12904 PUGI__FN const xpath_parse_result& xpath_query::result() const
12909 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12913 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12915 return _impl ? unspecified_bool_xpath_query : 0;
12918 PUGI__FN bool xpath_query::operator!() const
12923 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12925 xpath_query q(query, variables);
12926 return q.evaluate_node(*this);
12929 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12931 return query.evaluate_node(*this);
12934 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12936 xpath_query q(query, variables);
12937 return q.evaluate_node_set(*this);
12940 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12942 return query.evaluate_node_set(*this);
12945 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12947 xpath_query q(query, variables);
12948 return q.evaluate_node(*this);
12951 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12953 return query.evaluate_node(*this);
12959 #ifdef __BORLANDC__
12960 # pragma option pop
12963 // Intel C++ does not properly keep warning state for function templates,
12964 // so popping warning state at the end of translation unit leads to warnings in the middle.
12965 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12966 # pragma warning(pop)
12969 #if defined(_MSC_VER) && defined(__c2__)
12970 # pragma clang diagnostic pop
12973 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12974 #undef PUGI__NO_INLINE
12975 #undef PUGI__UNLIKELY
12976 #undef PUGI__STATIC_ASSERT
12977 #undef PUGI__DMC_VOLATILE
12978 #undef PUGI__UNSIGNED_OVERFLOW
12979 #undef PUGI__MSVC_CRT_VERSION
12980 #undef PUGI__SNPRINTF
12981 #undef PUGI__NS_BEGIN
12982 #undef PUGI__NS_END
12984 #undef PUGI__FN_NO_INLINE
12985 #undef PUGI__GETHEADER_IMPL
12986 #undef PUGI__GETPAGE_IMPL
12987 #undef PUGI__GETPAGE
12988 #undef PUGI__NODETYPE
12989 #undef PUGI__IS_CHARTYPE_IMPL
12990 #undef PUGI__IS_CHARTYPE
12991 #undef PUGI__IS_CHARTYPEX
12992 #undef PUGI__ENDSWITH
12993 #undef PUGI__SKIPWS
12994 #undef PUGI__OPTSET
12995 #undef PUGI__PUSHNODE
12996 #undef PUGI__POPNODE
12997 #undef PUGI__SCANFOR
12998 #undef PUGI__SCANWHILE
12999 #undef PUGI__SCANWHILE_UNROLL
13000 #undef PUGI__ENDSEG
13001 #undef PUGI__THROW_ERROR
13002 #undef PUGI__CHECK_ERROR
13007 * Copyright (c) 2006-2022 Arseny Kapoulkine
13009 * Permission is hereby granted, free of charge, to any person
13010 * obtaining a copy of this software and associated documentation
13011 * files (the "Software"), to deal in the Software without
13012 * restriction, including without limitation the rights to use,
13013 * copy, modify, merge, publish, distribute, sublicense, and/or sell
13014 * copies of the Software, and to permit persons to whom the
13015 * Software is furnished to do so, subject to the following
13018 * The above copyright notice and this permission notice shall be
13019 * included in all copies or substantial portions of the Software.
13021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13022 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13023 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
13024 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13025 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13026 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
13027 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
13028 * OTHER DEALINGS IN THE SOFTWARE.