2 * pugixml parser - version 1.7
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
7 * This library is distributed under the MIT License. See notice at the end
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
17 #include "pugixml.hpp"
25 #ifdef PUGIXML_WCHAR_MODE
29 #ifndef PUGIXML_NO_XPATH
32 # ifdef PUGIXML_NO_EXCEPTIONS
37 #ifndef PUGIXML_NO_STL
47 # pragma warning(push)
48 # pragma warning(disable: 4127) // conditional expression is constant
49 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
50 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
51 # pragma warning(disable: 4702) // unreachable code
52 # pragma warning(disable: 4996) // this function or variable may be unsafe
53 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
56 #ifdef __INTEL_COMPILER
57 # pragma warning(disable: 177) // function was declared but never referenced
58 # pragma warning(disable: 279) // controlling expression is constant
59 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
69 # pragma warn -8008 // condition is always false
70 # pragma warn -8066 // unreachable code
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 # pragma diag_suppress=178 // function was declared but never referenced
76 # pragma diag_suppress=237 // controlling expression is constant
80 #if defined(_MSC_VER) && _MSC_VER >= 1300
81 # define PUGI__NO_INLINE __declspec(noinline)
82 #elif defined(__GNUC__)
83 # define PUGI__NO_INLINE __attribute__((noinline))
85 # define PUGI__NO_INLINE
88 // Branch weight controls
90 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
92 # define PUGI__UNLIKELY(cond) (cond)
95 // Simple static assertion
96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
100 # define PUGI__DMC_VOLATILE volatile
102 # define PUGI__DMC_VOLATILE
105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
112 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
113 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
114 # define LLONG_MAX 9223372036854775807LL
115 # define LLONG_MIN (-LLONG_MAX-1)
116 # define ULLONG_MAX (2ULL*LLONG_MAX+1)
119 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
120 #if defined(_MSC_VER) && !defined(__S3E__)
121 # define PUGI__MSVC_CRT_VERSION _MSC_VER
124 #ifdef PUGIXML_HEADER_ONLY
125 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
126 # define PUGI__NS_END } }
127 # define PUGI__FN inline
128 # define PUGI__FN_NO_INLINE inline
130 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
131 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
132 # define PUGI__NS_END } }
134 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
135 # define PUGI__NS_END } } }
138 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
142 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
145 # ifndef _UINTPTR_T_DEFINED
146 typedef size_t uintptr_t;
149 typedef unsigned __int8 uint8_t;
150 typedef unsigned __int16 uint16_t;
151 typedef unsigned __int32 uint32_t;
159 PUGI__FN void* default_allocate(size_t size)
164 PUGI__FN void default_deallocate(void* ptr)
169 template <typename T>
170 struct xml_memory_management_function_storage
172 static allocation_function allocate;
173 static deallocation_function deallocate;
176 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
177 // Without a template<> we'll get multiple definitions of the same static
178 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
179 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
181 typedef xml_memory_management_function_storage<int> xml_memory;
187 PUGI__FN size_t strlength(const char_t* s)
191 #ifdef PUGIXML_WCHAR_MODE
198 // Compare two strings
199 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
203 #ifdef PUGIXML_WCHAR_MODE
204 return wcscmp(src, dst) == 0;
206 return strcmp(src, dst) == 0;
210 // Compare lhs with [rhs_begin, rhs_end)
211 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
213 for (size_t i = 0; i < count; ++i)
214 if (lhs[i] != rhs[i])
217 return lhs[count] == 0;
220 // Get length of wide string, even if CRT lacks wide character support
221 PUGI__FN size_t strlength_wide(const wchar_t* s)
225 #ifdef PUGIXML_WCHAR_MODE
228 const wchar_t* end = s;
230 return static_cast<size_t>(end - s);
235 // auto_ptr-like object for exception recovery
237 template <typename T> struct auto_deleter
239 typedef void (*D)(T*);
244 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
250 if (data) deleter(data);
262 #ifdef PUGIXML_COMPACT
264 class compact_hash_table
267 compact_hash_table(): _items(0), _capacity(0), _count(0)
275 xml_memory::deallocate(_items);
282 void** find(const void* key)
286 if (_capacity == 0) return 0;
288 size_t hashmod = _capacity - 1;
289 size_t bucket = hash(key) & hashmod;
291 for (size_t probe = 0; probe <= hashmod; ++probe)
293 item_t& probe_item = _items[bucket];
295 if (probe_item.key == key)
296 return &probe_item.value;
298 if (probe_item.key == 0)
301 // hash collision, quadratic probing
302 bucket = (bucket + probe + 1) & hashmod;
305 assert(false && "Hash table is full");
309 void** insert(const void* key)
312 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
314 size_t hashmod = _capacity - 1;
315 size_t bucket = hash(key) & hashmod;
317 for (size_t probe = 0; probe <= hashmod; ++probe)
319 item_t& probe_item = _items[bucket];
321 if (probe_item.key == 0)
323 probe_item.key = key;
325 return &probe_item.value;
328 if (probe_item.key == key)
329 return &probe_item.value;
331 // hash collision, quadratic probing
332 bucket = (bucket + probe + 1) & hashmod;
335 assert(false && "Hash table is full");
341 if (_count + 16 >= _capacity - _capacity / 4)
361 static unsigned int hash(const void* key)
363 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
365 // MurmurHash3 32-bit finalizer
376 PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
378 compact_hash_table rt;
379 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
380 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
385 memset(rt._items, 0, sizeof(item_t) * rt._capacity);
387 for (size_t i = 0; i < _capacity; ++i)
389 *rt.insert(_items[i].key) = _items[i].value;
392 xml_memory::deallocate(_items);
394 _capacity = rt._capacity;
397 assert(_count == rt._count);
406 #ifdef PUGIXML_COMPACT
407 static const uintptr_t xml_memory_block_alignment = 4;
409 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
412 // extra metadata bits
413 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
414 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
415 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
416 static const uintptr_t xml_memory_page_type_mask = 15;
418 // combined masks for string uniqueness
419 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
420 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
422 #ifdef PUGIXML_COMPACT
423 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
424 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
426 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
427 #define PUGI__GETPAGE_IMPL(header) const_cast<impl::xml_memory_page*>(reinterpret_cast<const impl::xml_memory_page*>(reinterpret_cast<const char*>(&header) - (header >> 8)))
430 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
431 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
433 struct xml_allocator;
435 struct xml_memory_page
437 static xml_memory_page* construct(void* memory)
439 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
441 result->allocator = 0;
444 result->busy_size = 0;
445 result->freed_size = 0;
447 #ifdef PUGIXML_COMPACT
448 result->compact_string_base = 0;
449 result->compact_shared_parent = 0;
450 result->compact_page_marker = 0;
456 xml_allocator* allocator;
458 xml_memory_page* prev;
459 xml_memory_page* next;
464 #ifdef PUGIXML_COMPACT
465 char_t* compact_string_base;
466 void* compact_shared_parent;
467 uint32_t* compact_page_marker;
471 static const size_t xml_memory_page_size =
472 #ifdef PUGIXML_MEMORY_PAGE_SIZE
473 (PUGIXML_MEMORY_PAGE_SIZE)
477 - sizeof(xml_memory_page);
479 struct xml_memory_string_header
481 uint16_t page_offset; // offset from page->data
482 uint16_t full_size; // 0 if string occupies whole page
487 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
489 #ifdef PUGIXML_COMPACT
494 xml_memory_page* allocate_page(size_t data_size)
496 size_t size = sizeof(xml_memory_page) + data_size;
498 // allocate block with some alignment, leaving memory for worst-case padding
499 void* memory = xml_memory::allocate(size);
500 if (!memory) return 0;
502 // prepare page structure
503 xml_memory_page* page = xml_memory_page::construct(memory);
506 page->allocator = _root->allocator;
511 static void deallocate_page(xml_memory_page* page)
513 xml_memory::deallocate(page);
516 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
518 void* allocate_memory(size_t size, xml_memory_page*& out_page)
520 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
521 return allocate_memory_oob(size, out_page);
523 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
532 #ifdef PUGIXML_COMPACT
533 void* allocate_object(size_t size, xml_memory_page*& out_page)
535 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
536 if (!result) return 0;
539 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
541 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
544 uint32_t* marker = static_cast<uint32_t*>(result);
546 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
547 out_page->compact_page_marker = marker;
549 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
550 // this will make sure deallocate_memory correctly tracks the size
551 out_page->freed_size += sizeof(uint32_t);
557 // roll back uint32_t part
558 _busy_size -= sizeof(uint32_t);
564 void* allocate_object(size_t size, xml_memory_page*& out_page)
566 return allocate_memory(size, out_page);
570 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
572 if (page == _root) page->busy_size = _busy_size;
574 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
577 page->freed_size += size;
578 assert(page->freed_size <= page->busy_size);
580 if (page->freed_size == page->busy_size)
584 assert(_root == page);
586 // top page freed, just reset sizes
588 page->freed_size = 0;
590 #ifdef PUGIXML_COMPACT
591 // reset compact state to maximize efficiency
592 page->compact_string_base = 0;
593 page->compact_shared_parent = 0;
594 page->compact_page_marker = 0;
601 assert(_root != page);
604 // remove from the list
605 page->prev->next = page->next;
606 page->next->prev = page->prev;
609 deallocate_page(page);
614 char_t* allocate_string(size_t length)
616 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
618 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
620 // allocate memory for string and header block
621 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
623 // round size up to block alignment boundary
624 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
626 xml_memory_page* page;
627 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
629 if (!header) return 0;
632 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
634 assert(page_offset % xml_memory_block_alignment == 0);
635 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
636 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
638 // full_size == 0 for large strings that occupy the whole page
639 assert(full_size % xml_memory_block_alignment == 0);
640 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
641 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
643 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
644 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
645 return static_cast<char_t*>(static_cast<void*>(header + 1));
648 void deallocate_string(char_t* string)
650 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
651 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
654 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
658 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
659 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
661 // if full_size == 0 then this string occupies the whole page
662 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
664 deallocate_memory(header, full_size, page);
669 #ifdef PUGIXML_COMPACT
670 return _hash->reserve();
676 xml_memory_page* _root;
679 #ifdef PUGIXML_COMPACT
680 compact_hash_table* _hash;
684 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
686 const size_t large_allocation_threshold = xml_memory_page_size / 4;
688 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
693 if (size <= large_allocation_threshold)
695 _root->busy_size = _busy_size;
697 // insert page at the end of linked list
706 // insert page before the end of linked list, so that it is deleted as soon as possible
707 // the last page is not deleted even if it's empty (see deallocate_memory)
710 page->prev = _root->prev;
713 _root->prev->next = page;
716 page->busy_size = size;
719 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
723 #ifdef PUGIXML_COMPACT
725 static const uintptr_t compact_alignment_log2 = 2;
726 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
731 compact_header(xml_memory_page* page, unsigned int flags)
733 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
735 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
736 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
738 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
739 _flags = static_cast<unsigned char>(flags);
742 void operator&=(uintptr_t mod)
744 _flags &= static_cast<unsigned char>(mod);
747 void operator|=(uintptr_t mod)
749 _flags |= static_cast<unsigned char>(mod);
752 uintptr_t operator&(uintptr_t mod) const
757 xml_memory_page* get_page() const
759 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
760 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
761 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
763 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
768 unsigned char _flags;
771 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
773 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
775 return header->get_page();
778 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
780 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
783 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
785 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
788 template <typename T, int header_offset, int start = -126> class compact_pointer
791 compact_pointer(): _data(0)
795 void operator=(const compact_pointer& rhs)
800 void operator=(T* value)
804 // value is guaranteed to be compact-aligned; 'this' is not
805 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
806 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
807 // compensate for arithmetic shift rounding for negative values
808 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
809 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
811 if (static_cast<uintptr_t>(offset) <= 253)
812 _data = static_cast<unsigned char>(offset + 1);
815 compact_set_value<header_offset>(this, value);
830 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
832 return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
835 return compact_get_value<header_offset, T>(this);
841 T* operator->() const
850 template <typename T, int header_offset> class compact_pointer_parent
853 compact_pointer_parent(): _data(0)
857 void operator=(const compact_pointer_parent& rhs)
862 void operator=(T* value)
866 // value is guaranteed to be compact-aligned; 'this' is not
867 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
868 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
869 // compensate for arithmetic shift behavior for negative values
870 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
871 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
873 if (static_cast<uintptr_t>(offset) <= 65533)
875 _data = static_cast<unsigned short>(offset + 1);
879 xml_memory_page* page = compact_get_page(this, header_offset);
881 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
882 page->compact_shared_parent = value;
884 if (page->compact_shared_parent == value)
890 compact_set_value<header_offset>(this, value);
908 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
910 return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
912 else if (_data == 65534)
913 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
915 return compact_get_value<header_offset, T>(this);
921 T* operator->() const
930 template <int header_offset, int base_offset> class compact_string
933 compact_string(): _data(0)
937 void operator=(const compact_string& rhs)
942 void operator=(char_t* value)
946 xml_memory_page* page = compact_get_page(this, header_offset);
948 if (PUGI__UNLIKELY(page->compact_string_base == 0))
949 page->compact_string_base = value;
951 ptrdiff_t offset = value - page->compact_string_base;
953 if (static_cast<uintptr_t>(offset) < (65535 << 7))
955 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
956 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
960 *base = static_cast<uint16_t>((offset >> 7) + 1);
961 _data = static_cast<unsigned char>((offset & 127) + 1);
965 ptrdiff_t remainder = offset - ((*base - 1) << 7);
967 if (static_cast<uintptr_t>(remainder) <= 253)
969 _data = static_cast<unsigned char>(remainder + 1);
973 compact_set_value<header_offset>(this, value);
981 compact_set_value<header_offset>(this, value);
992 operator char_t*() const
998 xml_memory_page* page = compact_get_page(this, header_offset);
1000 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1001 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1004 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1006 return page->compact_string_base + offset;
1010 return compact_get_value<header_offset, char_t>(this);
1018 unsigned char _data;
1023 #ifdef PUGIXML_COMPACT
1026 struct xml_attribute_struct
1028 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1030 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1033 impl::compact_header header;
1035 uint16_t namevalue_base;
1037 impl::compact_string<4, 2> name;
1038 impl::compact_string<5, 3> value;
1040 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1041 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1044 struct xml_node_struct
1046 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1048 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1051 impl::compact_header header;
1053 uint16_t namevalue_base;
1055 impl::compact_string<4, 2> name;
1056 impl::compact_string<5, 3> value;
1058 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1060 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1062 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1063 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1065 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1071 struct xml_attribute_struct
1073 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1075 header = PUGI__GETHEADER_IMPL(this, page, 0);
1083 xml_attribute_struct* prev_attribute_c;
1084 xml_attribute_struct* next_attribute;
1087 struct xml_node_struct
1089 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1091 header = PUGI__GETHEADER_IMPL(this, page, type);
1099 xml_node_struct* parent;
1101 xml_node_struct* first_child;
1103 xml_node_struct* prev_sibling_c;
1104 xml_node_struct* next_sibling;
1106 xml_attribute_struct* first_attribute;
1112 struct xml_extra_buffer
1115 xml_extra_buffer* next;
1118 struct xml_document_struct: public xml_node_struct, public xml_allocator
1120 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1122 #ifdef PUGIXML_COMPACT
1127 const char_t* buffer;
1129 xml_extra_buffer* extra_buffers;
1131 #ifdef PUGIXML_COMPACT
1132 compact_hash_table hash;
1136 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1140 return *PUGI__GETPAGE(object)->allocator;
1143 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1147 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1151 // Low-level DOM operations
1153 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1155 xml_memory_page* page;
1156 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1157 if (!memory) return 0;
1159 return new (memory) xml_attribute_struct(page);
1162 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1164 xml_memory_page* page;
1165 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1166 if (!memory) return 0;
1168 return new (memory) xml_node_struct(page, type);
1171 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1173 if (a->header & impl::xml_memory_page_name_allocated_mask)
1174 alloc.deallocate_string(a->name);
1176 if (a->header & impl::xml_memory_page_value_allocated_mask)
1177 alloc.deallocate_string(a->value);
1179 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1182 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1184 if (n->header & impl::xml_memory_page_name_allocated_mask)
1185 alloc.deallocate_string(n->name);
1187 if (n->header & impl::xml_memory_page_value_allocated_mask)
1188 alloc.deallocate_string(n->value);
1190 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1192 xml_attribute_struct* next = attr->next_attribute;
1194 destroy_attribute(attr, alloc);
1199 for (xml_node_struct* child = n->first_child; child; )
1201 xml_node_struct* next = child->next_sibling;
1203 destroy_node(child, alloc);
1208 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1211 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1213 child->parent = node;
1215 xml_node_struct* head = node->first_child;
1219 xml_node_struct* tail = head->prev_sibling_c;
1221 tail->next_sibling = child;
1222 child->prev_sibling_c = tail;
1223 head->prev_sibling_c = child;
1227 node->first_child = child;
1228 child->prev_sibling_c = child;
1232 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1234 child->parent = node;
1236 xml_node_struct* head = node->first_child;
1240 child->prev_sibling_c = head->prev_sibling_c;
1241 head->prev_sibling_c = child;
1244 child->prev_sibling_c = child;
1246 child->next_sibling = head;
1247 node->first_child = child;
1250 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1252 xml_node_struct* parent = node->parent;
1254 child->parent = parent;
1256 if (node->next_sibling)
1257 node->next_sibling->prev_sibling_c = child;
1259 parent->first_child->prev_sibling_c = child;
1261 child->next_sibling = node->next_sibling;
1262 child->prev_sibling_c = node;
1264 node->next_sibling = child;
1267 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1269 xml_node_struct* parent = node->parent;
1271 child->parent = parent;
1273 if (node->prev_sibling_c->next_sibling)
1274 node->prev_sibling_c->next_sibling = child;
1276 parent->first_child = child;
1278 child->prev_sibling_c = node->prev_sibling_c;
1279 child->next_sibling = node;
1281 node->prev_sibling_c = child;
1284 inline void remove_node(xml_node_struct* node)
1286 xml_node_struct* parent = node->parent;
1288 if (node->next_sibling)
1289 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1291 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1293 if (node->prev_sibling_c->next_sibling)
1294 node->prev_sibling_c->next_sibling = node->next_sibling;
1296 parent->first_child = node->next_sibling;
1299 node->prev_sibling_c = 0;
1300 node->next_sibling = 0;
1303 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1305 xml_attribute_struct* head = node->first_attribute;
1309 xml_attribute_struct* tail = head->prev_attribute_c;
1311 tail->next_attribute = attr;
1312 attr->prev_attribute_c = tail;
1313 head->prev_attribute_c = attr;
1317 node->first_attribute = attr;
1318 attr->prev_attribute_c = attr;
1322 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1324 xml_attribute_struct* head = node->first_attribute;
1328 attr->prev_attribute_c = head->prev_attribute_c;
1329 head->prev_attribute_c = attr;
1332 attr->prev_attribute_c = attr;
1334 attr->next_attribute = head;
1335 node->first_attribute = attr;
1338 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1340 if (place->next_attribute)
1341 place->next_attribute->prev_attribute_c = attr;
1343 node->first_attribute->prev_attribute_c = attr;
1345 attr->next_attribute = place->next_attribute;
1346 attr->prev_attribute_c = place;
1347 place->next_attribute = attr;
1350 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1352 if (place->prev_attribute_c->next_attribute)
1353 place->prev_attribute_c->next_attribute = attr;
1355 node->first_attribute = attr;
1357 attr->prev_attribute_c = place->prev_attribute_c;
1358 attr->next_attribute = place;
1359 place->prev_attribute_c = attr;
1362 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1364 if (attr->next_attribute)
1365 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1367 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1369 if (attr->prev_attribute_c->next_attribute)
1370 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1372 node->first_attribute = attr->next_attribute;
1374 attr->prev_attribute_c = 0;
1375 attr->next_attribute = 0;
1378 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1380 if (!alloc.reserve()) return 0;
1382 xml_node_struct* child = allocate_node(alloc, type);
1383 if (!child) return 0;
1385 append_node(child, node);
1390 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1392 if (!alloc.reserve()) return 0;
1394 xml_attribute_struct* attr = allocate_attribute(alloc);
1395 if (!attr) return 0;
1397 append_attribute(attr, node);
1403 // Helper classes for code generation
1416 // Unicode utilities
1418 inline uint16_t endian_swap(uint16_t value)
1420 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1423 inline uint32_t endian_swap(uint32_t value)
1425 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1430 typedef size_t value_type;
1432 static value_type low(value_type result, uint32_t ch)
1435 if (ch < 0x80) return result + 1;
1437 else if (ch < 0x800) return result + 2;
1439 else return result + 3;
1442 static value_type high(value_type result, uint32_t)
1444 // U+10000..U+10FFFF
1451 typedef uint8_t* value_type;
1453 static value_type low(value_type result, uint32_t ch)
1458 *result = static_cast<uint8_t>(ch);
1462 else if (ch < 0x800)
1464 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1465 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1471 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1472 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1473 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1478 static value_type high(value_type result, uint32_t ch)
1480 // U+10000..U+10FFFF
1481 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1482 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1483 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1484 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1488 static value_type any(value_type result, uint32_t ch)
1490 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1494 struct utf16_counter
1496 typedef size_t value_type;
1498 static value_type low(value_type result, uint32_t)
1503 static value_type high(value_type result, uint32_t)
1511 typedef uint16_t* value_type;
1513 static value_type low(value_type result, uint32_t ch)
1515 *result = static_cast<uint16_t>(ch);
1520 static value_type high(value_type result, uint32_t ch)
1522 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1523 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1525 result[0] = static_cast<uint16_t>(0xD800 + msh);
1526 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1531 static value_type any(value_type result, uint32_t ch)
1533 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1537 struct utf32_counter
1539 typedef size_t value_type;
1541 static value_type low(value_type result, uint32_t)
1546 static value_type high(value_type result, uint32_t)
1554 typedef uint32_t* value_type;
1556 static value_type low(value_type result, uint32_t ch)
1563 static value_type high(value_type result, uint32_t ch)
1570 static value_type any(value_type result, uint32_t ch)
1578 struct latin1_writer
1580 typedef uint8_t* value_type;
1582 static value_type low(value_type result, uint32_t ch)
1584 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1589 static value_type high(value_type result, uint32_t ch)
1601 typedef uint8_t type;
1603 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1605 const uint8_t utf8_byte_mask = 0x3f;
1609 uint8_t lead = *data;
1611 // 0xxxxxxx -> U+0000..U+007F
1614 result = Traits::low(result, lead);
1618 // process aligned single-byte (ascii) blocks
1619 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1621 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1622 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1624 result = Traits::low(result, data[0]);
1625 result = Traits::low(result, data[1]);
1626 result = Traits::low(result, data[2]);
1627 result = Traits::low(result, data[3]);
1633 // 110xxxxx -> U+0080..U+07FF
1634 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1636 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1640 // 1110xxxx -> U+0800-U+FFFF
1641 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1643 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1647 // 11110xxx -> U+10000..U+10FFFF
1648 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1650 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1654 // 10xxxxxx or 11111xxx -> invalid
1666 template <typename opt_swap> struct utf16_decoder
1668 typedef uint16_t type;
1670 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1674 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1679 result = Traits::low(result, lead);
1684 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1686 result = Traits::low(result, lead);
1690 // surrogate pair lead
1691 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1693 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1695 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1697 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1718 template <typename opt_swap> struct utf32_decoder
1720 typedef uint32_t type;
1722 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1726 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1731 result = Traits::low(result, lead);
1735 // U+10000..U+10FFFF
1738 result = Traits::high(result, lead);
1748 struct latin1_decoder
1750 typedef uint8_t type;
1752 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1756 result = Traits::low(result, *data);
1765 template <size_t size> struct wchar_selector;
1767 template <> struct wchar_selector<2>
1769 typedef uint16_t type;
1770 typedef utf16_counter counter;
1771 typedef utf16_writer writer;
1772 typedef utf16_decoder<opt_false> decoder;
1775 template <> struct wchar_selector<4>
1777 typedef uint32_t type;
1778 typedef utf32_counter counter;
1779 typedef utf32_writer writer;
1780 typedef utf32_decoder<opt_false> decoder;
1783 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1784 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1786 struct wchar_decoder
1788 typedef wchar_t type;
1790 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1792 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1794 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1798 #ifdef PUGIXML_WCHAR_MODE
1799 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1801 for (size_t i = 0; i < length; ++i)
1802 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1810 ct_parse_pcdata = 1, // \0, &, \r, <
1811 ct_parse_attr = 2, // \0, &, \r, ', "
1812 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1813 ct_space = 8, // \r, \n, space, tab
1814 ct_parse_cdata = 16, // \0, ], >, \r
1815 ct_parse_comment = 32, // \0, -, >, \r
1816 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1817 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1820 static const unsigned char chartype_table[256] =
1822 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1823 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1824 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1825 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1826 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1827 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1828 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1829 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1831 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1832 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1833 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1834 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1835 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1836 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1837 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1838 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1843 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1844 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1845 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1846 ctx_digit = 8, // 0-9
1847 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1850 static const unsigned char chartypex_table[256] =
1852 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1853 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1854 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1855 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1857 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1858 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1859 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1860 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1862 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1863 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1864 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1865 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1866 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1867 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1868 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1869 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1872 #ifdef PUGIXML_WCHAR_MODE
1873 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1875 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1878 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1879 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1881 PUGI__FN bool is_little_endian()
1883 unsigned int ui = 1;
1885 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1888 PUGI__FN xml_encoding get_wchar_encoding()
1890 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1892 if (sizeof(wchar_t) == 2)
1893 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1895 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1898 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1900 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1901 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1903 // check if we have a non-empty XML declaration
1904 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1907 // scan XML declaration until the encoding field
1908 for (size_t i = 6; i + 1 < size; ++i)
1910 // declaration can not contain ? in quoted values
1914 if (data[i] == 'e' && data[i + 1] == 'n')
1918 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1919 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1920 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1923 PUGI__SCANCHARTYPE(ct_space);
1924 PUGI__SCANCHAR('=');
1925 PUGI__SCANCHARTYPE(ct_space);
1927 // the only two valid delimiters are ' and "
1928 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1930 PUGI__SCANCHAR(delimiter);
1932 size_t start = offset;
1934 out_encoding = data + offset;
1936 PUGI__SCANCHARTYPE(ct_symbol);
1938 out_length = offset - start;
1940 PUGI__SCANCHAR(delimiter);
1948 #undef PUGI__SCANCHAR
1949 #undef PUGI__SCANCHARTYPE
1952 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1954 // skip encoding autodetection if input buffer is too small
1955 if (size < 4) return encoding_utf8;
1957 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1959 // look for BOM in first few bytes
1960 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1961 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1962 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1963 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1964 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1966 // look for <, <? or <?xm in various encodings
1967 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1968 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1969 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1970 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1972 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1973 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1974 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1976 // no known BOM detected; parse declaration
1977 const uint8_t* enc = 0;
1978 size_t enc_length = 0;
1980 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
1982 // iso-8859-1 (case-insensitive)
1983 if (enc_length == 10
1984 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
1985 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
1986 && enc[8] == '-' && enc[9] == '1')
1987 return encoding_latin1;
1989 // latin1 (case-insensitive)
1991 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
1992 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
1994 return encoding_latin1;
1997 return encoding_utf8;
2000 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2002 // replace wchar encoding with utf implementation
2003 if (encoding == encoding_wchar) return get_wchar_encoding();
2005 // replace utf16 encoding with utf16 with specific endianness
2006 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2008 // replace utf32 encoding with utf32 with specific endianness
2009 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2011 // only do autodetection if no explicit encoding is requested
2012 if (encoding != encoding_auto) return encoding;
2014 // try to guess encoding (based on XML specification, Appendix F.1)
2015 const uint8_t* data = static_cast<const uint8_t*>(contents);
2017 return guess_buffer_encoding(data, size);
2020 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2022 size_t length = size / sizeof(char_t);
2026 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2027 out_length = length;
2031 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2032 if (!buffer) return false;
2035 memcpy(buffer, contents, length * sizeof(char_t));
2037 assert(length == 0);
2041 out_buffer = buffer;
2042 out_length = length + 1;
2048 #ifdef PUGIXML_WCHAR_MODE
2049 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2051 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2052 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2055 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2057 const char_t* data = static_cast<const char_t*>(contents);
2058 size_t length = size / sizeof(char_t);
2062 char_t* buffer = const_cast<char_t*>(data);
2064 convert_wchar_endian_swap(buffer, data, length);
2066 out_buffer = buffer;
2067 out_length = length;
2071 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2072 if (!buffer) return false;
2074 convert_wchar_endian_swap(buffer, data, length);
2077 out_buffer = buffer;
2078 out_length = length + 1;
2084 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2086 const typename D::type* data = static_cast<const typename D::type*>(contents);
2087 size_t data_length = size / sizeof(typename D::type);
2089 // first pass: get length in wchar_t units
2090 size_t length = D::process(data, data_length, 0, wchar_counter());
2092 // allocate buffer of suitable length
2093 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2094 if (!buffer) return false;
2096 // second pass: convert utf16 input to wchar_t
2097 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2098 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2100 assert(oend == obegin + length);
2103 out_buffer = buffer;
2104 out_length = length + 1;
2109 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2111 // get native encoding
2112 xml_encoding wchar_encoding = get_wchar_encoding();
2114 // fast path: no conversion required
2115 if (encoding == wchar_encoding)
2116 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2118 // only endian-swapping is required
2119 if (need_endian_swap_utf(encoding, wchar_encoding))
2120 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2122 // source encoding is utf8
2123 if (encoding == encoding_utf8)
2124 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2126 // source encoding is utf16
2127 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2129 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2131 return (native_encoding == encoding) ?
2132 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2133 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2136 // source encoding is utf32
2137 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2139 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2141 return (native_encoding == encoding) ?
2142 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2143 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2146 // source encoding is latin1
2147 if (encoding == encoding_latin1)
2148 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2150 assert(false && "Invalid encoding");
2154 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2156 const typename D::type* data = static_cast<const typename D::type*>(contents);
2157 size_t data_length = size / sizeof(typename D::type);
2159 // first pass: get length in utf8 units
2160 size_t length = D::process(data, data_length, 0, utf8_counter());
2162 // allocate buffer of suitable length
2163 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2164 if (!buffer) return false;
2166 // second pass: convert utf16 input to utf8
2167 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2168 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2170 assert(oend == obegin + length);
2173 out_buffer = buffer;
2174 out_length = length + 1;
2179 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2181 for (size_t i = 0; i < size; ++i)
2188 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2190 const uint8_t* data = static_cast<const uint8_t*>(contents);
2191 size_t data_length = size;
2193 // get size of prefix that does not need utf8 conversion
2194 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2195 assert(prefix_length <= data_length);
2197 const uint8_t* postfix = data + prefix_length;
2198 size_t postfix_length = data_length - prefix_length;
2200 // if no conversion is needed, just return the original buffer
2201 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2203 // first pass: get length in utf8 units
2204 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2206 // allocate buffer of suitable length
2207 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2208 if (!buffer) return false;
2210 // second pass: convert latin1 input to utf8
2211 memcpy(buffer, data, prefix_length);
2213 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2214 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2216 assert(oend == obegin + length);
2219 out_buffer = buffer;
2220 out_length = length + 1;
2225 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2227 // fast path: no conversion required
2228 if (encoding == encoding_utf8)
2229 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2231 // source encoding is utf16
2232 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2234 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2236 return (native_encoding == encoding) ?
2237 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2238 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2241 // source encoding is utf32
2242 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2244 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2246 return (native_encoding == encoding) ?
2247 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2248 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2251 // source encoding is latin1
2252 if (encoding == encoding_latin1)
2253 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2255 assert(false && "Invalid encoding");
2260 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2262 // get length in utf8 characters
2263 return wchar_decoder::process(str, length, 0, utf8_counter());
2266 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2269 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2270 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2272 assert(begin + size == end);
2277 #ifndef PUGIXML_NO_STL
2278 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2280 // first pass: get length in utf8 characters
2281 size_t size = as_utf8_begin(str, length);
2283 // allocate resulting string
2285 result.resize(size);
2287 // second pass: convert to utf8
2288 if (size > 0) as_utf8_end(&result[0], size, str, length);
2293 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2295 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2297 // first pass: get length in wchar_t units
2298 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2300 // allocate resulting string
2301 std::basic_string<wchar_t> result;
2302 result.resize(length);
2304 // second pass: convert to wchar_t
2307 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2308 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2310 assert(begin + length == end);
2318 template <typename Header>
2319 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2321 // never reuse shared memory
2322 if (header & xml_memory_page_contents_shared_mask) return false;
2324 size_t target_length = strlength(target);
2326 // always reuse document buffer memory if possible
2327 if ((header & header_mask) == 0) return target_length >= length;
2329 // reuse heap memory if waste is not too great
2330 const size_t reuse_threshold = 32;
2332 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2335 template <typename String, typename Header>
2336 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2338 if (source_length == 0)
2340 // empty string and null pointer are equivalent, so just deallocate old memory
2341 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2343 if (header & header_mask) alloc->deallocate_string(dest);
2345 // mark the string as not allocated
2347 header &= ~header_mask;
2351 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2353 // we can reuse old buffer, so just copy the new data (including zero terminator)
2354 memcpy(dest, source, source_length * sizeof(char_t));
2355 dest[source_length] = 0;
2361 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2363 if (!alloc->reserve()) return false;
2365 // allocate new buffer
2366 char_t* buf = alloc->allocate_string(source_length + 1);
2367 if (!buf) return false;
2369 // copy the string (including zero terminator)
2370 memcpy(buf, source, source_length * sizeof(char_t));
2371 buf[source_length] = 0;
2373 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2374 if (header & header_mask && dest) alloc->deallocate_string(dest);
2376 // the string is now allocated, so set the flag
2378 header |= header_mask;
2389 gap(): end(0), size(0)
2393 // Push new gap, move s count bytes further (skipping the gap).
2394 // Collapse previous gap.
2395 void push(char_t*& s, size_t count)
2397 if (end) // there was a gap already; collapse it
2399 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2401 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2404 s += count; // end of current gap
2411 // Collapse all gaps, return past-the-end pointer
2412 char_t* flush(char_t* s)
2416 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2418 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2426 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2428 char_t* stre = s + 1;
2434 unsigned int ucsc = 0;
2436 if (stre[1] == 'x') // &#x... (hex code)
2442 if (ch == ';') return stre;
2446 if (static_cast<unsigned int>(ch - '0') <= 9)
2447 ucsc = 16 * ucsc + (ch - '0');
2448 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2449 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2460 else // &#... (dec code)
2462 char_t ch = *++stre;
2464 if (ch == ';') return stre;
2468 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2469 ucsc = 10 * ucsc + (ch - '0');
2481 #ifdef PUGIXML_WCHAR_MODE
2482 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2484 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2487 g.push(s, stre - s);
2495 if (*stre == 'm') // &am
2497 if (*++stre == 'p' && *++stre == ';') // &
2502 g.push(s, stre - s);
2506 else if (*stre == 'p') // &ap
2508 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
2513 g.push(s, stre - s);
2522 if (*++stre == 't' && *++stre == ';') // >
2527 g.push(s, stre - s);
2535 if (*++stre == 't' && *++stre == ';') // <
2540 g.push(s, stre - s);
2548 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
2553 g.push(s, stre - s);
2567 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2568 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2569 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2570 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2571 #define PUGI__POPNODE() { cursor = cursor->parent; }
2572 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2573 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2574 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2575 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2576 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2577 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2579 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2585 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2587 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2589 *s++ = '\n'; // replace first one with 0x0a
2591 if (*s == '\n') g.push(s, 1);
2593 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2597 return s + (s[2] == '>' ? 3 : 2);
2607 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2613 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2615 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2617 *s++ = '\n'; // replace first one with 0x0a
2619 if (*s == '\n') g.push(s, 1);
2621 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2635 typedef char_t* (*strconv_pcdata_t)(char_t*);
2637 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2639 static char_t* parse(char_t* s)
2647 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2649 if (*s == '<') // PCDATA ends here
2651 char_t* end = g.flush(s);
2653 if (opt_trim::value)
2654 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2661 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2663 *s++ = '\n'; // replace first one with 0x0a
2665 if (*s == '\n') g.push(s, 1);
2667 else if (opt_escape::value && *s == '&')
2669 s = strconv_escape(s, g);
2673 char_t* end = g.flush(s);
2675 if (opt_trim::value)
2676 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2688 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2690 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2692 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2694 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2695 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2696 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2697 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2698 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2699 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2700 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2701 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2702 default: assert(false); return 0; // should not get here
2706 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2708 template <typename opt_escape> struct strconv_attribute_impl
2710 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2714 // trim leading whitespaces
2715 if (PUGI__IS_CHARTYPE(*s, ct_space))
2720 while (PUGI__IS_CHARTYPE(*str, ct_space));
2727 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2729 if (*s == end_quote)
2731 char_t* str = g.flush(s);
2734 while (PUGI__IS_CHARTYPE(*str, ct_space));
2738 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2742 if (PUGI__IS_CHARTYPE(*s, ct_space))
2744 char_t* str = s + 1;
2745 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2750 else if (opt_escape::value && *s == '&')
2752 s = strconv_escape(s, g);
2762 static char_t* parse_wconv(char_t* s, char_t end_quote)
2768 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2770 if (*s == end_quote)
2776 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2782 if (*s == '\n') g.push(s, 1);
2786 else if (opt_escape::value && *s == '&')
2788 s = strconv_escape(s, g);
2798 static char_t* parse_eol(char_t* s, char_t end_quote)
2804 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2806 if (*s == end_quote)
2812 else if (*s == '\r')
2816 if (*s == '\n') g.push(s, 1);
2818 else if (opt_escape::value && *s == '&')
2820 s = strconv_escape(s, g);
2830 static char_t* parse_simple(char_t* s, char_t end_quote)
2836 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2838 if (*s == end_quote)
2844 else if (opt_escape::value && *s == '&')
2846 s = strconv_escape(s, g);
2857 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2859 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2861 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2863 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2864 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2865 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2866 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2867 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2868 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2869 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2870 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2871 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2872 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2873 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2874 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2875 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2876 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2877 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2878 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2879 default: assert(false); return 0; // should not get here
2883 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2885 xml_parse_result result;
2886 result.status = status;
2887 result.offset = offset;
2894 xml_allocator alloc;
2895 xml_allocator* alloc_state;
2896 char_t* error_offset;
2897 xml_parse_status error_status;
2899 xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
2905 *alloc_state = alloc;
2908 // DOCTYPE consists of nested sections of the following possible types:
2909 // <!-- ... -->, <? ... ?>, "...", '...'
2912 // First group can not contain nested groups
2913 // Second group can contain nested groups of the same type
2914 // Third group can contain all other groups
2915 char_t* parse_doctype_primitive(char_t* s)
2917 if (*s == '"' || *s == '\'')
2921 PUGI__SCANFOR(*s == ch);
2922 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2926 else if (s[0] == '<' && s[1] == '?')
2930 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2931 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2935 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2938 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2939 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2943 else PUGI__THROW_ERROR(status_bad_doctype, s);
2948 char_t* parse_doctype_ignore(char_t* s)
2952 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2957 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2959 // nested ignore section
2963 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2965 // ignore section end
2976 PUGI__THROW_ERROR(status_bad_doctype, s);
2979 char_t* parse_doctype_group(char_t* s, char_t endch)
2983 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2988 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2993 s = parse_doctype_ignore(s);
2998 // some control group
3003 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3005 // unknown tag (forbidden), or some primitive group
3006 s = parse_doctype_primitive(s);
3020 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3025 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3027 // parse node contents, starting with exclamation mark
3030 if (*s == '-') // '<!-...'
3034 if (*s == '-') // '<!--...'
3038 if (PUGI__OPTSET(parse_comments))
3040 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3041 cursor->value = s; // Save the offset.
3044 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3046 s = strconv_comment(s, endch);
3048 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3052 // Scan for terminating '-->'.
3053 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3054 PUGI__CHECK_ERROR(status_bad_comment, s);
3056 if (PUGI__OPTSET(parse_comments))
3057 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3059 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3062 else PUGI__THROW_ERROR(status_bad_comment, s);
3067 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3071 if (PUGI__OPTSET(parse_cdata))
3073 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3074 cursor->value = s; // Save the offset.
3076 if (PUGI__OPTSET(parse_eol))
3078 s = strconv_cdata(s, endch);
3080 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3084 // Scan for terminating ']]>'.
3085 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3086 PUGI__CHECK_ERROR(status_bad_cdata, s);
3088 *s++ = 0; // Zero-terminate this segment.
3091 else // Flagged for discard, but we still have to scan for the terminator.
3093 // Scan for terminating ']]>'.
3094 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3095 PUGI__CHECK_ERROR(status_bad_cdata, s);
3100 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3102 else PUGI__THROW_ERROR(status_bad_cdata, s);
3104 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3108 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3110 char_t* mark = s + 9;
3112 s = parse_doctype_group(s, endch);
3115 assert((*s == 0 && endch == '>') || *s == '>');
3118 if (PUGI__OPTSET(parse_doctype))
3120 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3122 PUGI__PUSHNODE(node_doctype);
3124 cursor->value = mark;
3127 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3128 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3129 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3134 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3136 // load into registers
3137 xml_node_struct* cursor = ref_cursor;
3140 // parse node contents, starting with question mark
3146 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3148 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3149 PUGI__CHECK_ERROR(status_bad_pi, s);
3151 // determine node type; stricmp / strcasecmp is not portable
3152 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3154 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3158 // disallow non top-level declarations
3159 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3161 PUGI__PUSHNODE(node_declaration);
3165 PUGI__PUSHNODE(node_pi);
3168 cursor->name = target;
3172 // parse value/attributes
3176 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3181 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3188 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3189 PUGI__CHECK_ERROR(status_bad_pi, s);
3193 // replace ending ? with / so that 'element' terminates properly
3196 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3201 // store value and step over >
3202 cursor->value = value;
3211 else PUGI__THROW_ERROR(status_bad_pi, s);
3216 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3217 PUGI__CHECK_ERROR(status_bad_pi, s);
3219 s += (s[1] == '>' ? 2 : 1);
3222 // store from registers
3223 ref_cursor = cursor;
3228 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3230 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3231 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3232 if (!strconv_pcdata) {
3236 xml_node_struct* cursor = root;
3246 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3248 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3252 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3253 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3259 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3264 PUGI__SKIPWS(); // Eat any whitespace.
3266 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3268 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
3269 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3271 a->name = s; // Save the offset.
3273 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3274 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3276 if (PUGI__IS_CHARTYPE(ch, ct_space))
3278 PUGI__SKIPWS(); // Eat any whitespace.
3284 if (ch == '=') // '<... #=...'
3286 PUGI__SKIPWS(); // Eat any whitespace.
3288 if (*s == '"' || *s == '\'') // '<... #="...'
3290 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3291 ++s; // Step over the quote.
3292 a->value = s; // Save the offset.
3294 s = strconv_attribute(s, ch);
3296 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3298 // After this line the loop continues from the start;
3299 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3300 // everything else will be detected
3301 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3303 else PUGI__THROW_ERROR(status_bad_attribute, s);
3305 else PUGI__THROW_ERROR(status_bad_attribute, s);
3317 else if (*s == 0 && endch == '>')
3322 else PUGI__THROW_ERROR(status_bad_start_element, s);
3330 else if (*s == 0 && endch == '>')
3334 else PUGI__THROW_ERROR(status_bad_start_element, s);
3339 else if (ch == '/') // '<#.../'
3341 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3343 PUGI__POPNODE(); // Pop.
3349 // we stepped over null terminator, backtrack & handle closing tag
3352 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3354 else PUGI__THROW_ERROR(status_bad_start_element, s);
3360 char_t* name = cursor->name;
3361 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3363 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3365 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3370 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3371 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
3374 PUGI__POPNODE(); // Pop.
3380 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3384 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3388 else if (*s == '?') // '<?...'
3390 s = parse_question(s, cursor, optmsk, endch);
3394 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3396 else if (*s == '!') // '<!...'
3398 s = parse_exclamation(s, cursor, optmsk, endch);
3401 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3402 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3406 mark = s; // Save this offset while searching for a terminator.
3408 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3410 if (*s == '<' || !*s)
3412 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3415 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3419 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3421 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3425 if (!PUGI__OPTSET(parse_trim_pcdata))
3428 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3430 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3432 cursor->value = s; // Save the offset.
3436 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3438 cursor->value = s; // Save the offset.
3440 PUGI__POPNODE(); // Pop since this is a standalone.
3443 s = strconv_pcdata(s);
3449 PUGI__SCANFOR(*s == '<'); // '...<'
3460 // check that last tag is closed
3461 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3466 #ifdef PUGIXML_WCHAR_MODE
3467 static char_t* parse_skip_bom(char_t* s)
3469 unsigned int bom = 0xfeff;
3470 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3473 static char_t* parse_skip_bom(char_t* s)
3475 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3479 static bool has_element_node_siblings(xml_node_struct* node)
3483 if (PUGI__NODETYPE(node) == node_element) return true;
3485 node = node->next_sibling;
3491 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3493 // early-out for empty documents
3495 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3497 // get last child of the root before parsing
3498 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3500 // create parser on stack
3501 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3503 // save last character and make buffer zero-terminated (speeds up parsing)
3504 char_t endch = buffer[length - 1];
3505 buffer[length - 1] = 0;
3507 // skip BOM to make sure it does not end up as part of parse output
3508 char_t* buffer_data = parse_skip_bom(buffer);
3510 // perform actual parsing
3511 parser.parse_tree(buffer_data, root, optmsk, endch);
3513 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3514 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3518 // since we removed last character, we have to handle the only possible false positive (stray <)
3520 return make_parse_result(status_unrecognized_tag, length - 1);
3522 // check if there are any element nodes parsed
3523 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3525 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3526 return make_parse_result(status_no_document_element, length - 1);
3530 // roll back offset if it occurs on a null terminator in the source buffer
3531 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3539 // Output facilities
3540 PUGI__FN xml_encoding get_write_native_encoding()
3542 #ifdef PUGIXML_WCHAR_MODE
3543 return get_wchar_encoding();
3545 return encoding_utf8;
3549 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3551 // replace wchar encoding with utf implementation
3552 if (encoding == encoding_wchar) return get_wchar_encoding();
3554 // replace utf16 encoding with utf16 with specific endianness
3555 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3557 // replace utf32 encoding with utf32 with specific endianness
3558 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3560 // only do autodetection if no explicit encoding is requested
3561 if (encoding != encoding_auto) return encoding;
3563 // assume utf8 encoding
3564 return encoding_utf8;
3567 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3569 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3571 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3573 return static_cast<size_t>(end - dest) * sizeof(*dest);
3576 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3578 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3580 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3584 for (typename T::value_type i = dest; i != end; ++i)
3585 *i = endian_swap(*i);
3588 return static_cast<size_t>(end - dest) * sizeof(*dest);
3591 #ifdef PUGIXML_WCHAR_MODE
3592 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3594 if (length < 1) return 0;
3596 // discard last character if it's the lead of a surrogate pair
3597 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3600 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3602 // only endian-swapping is required
3603 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3605 convert_wchar_endian_swap(r_char, data, length);
3607 return length * sizeof(char_t);
3611 if (encoding == encoding_utf8)
3612 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3615 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3617 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3619 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3623 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3625 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3627 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3630 // convert to latin1
3631 if (encoding == encoding_latin1)
3632 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3634 assert(false && "Invalid encoding");
3638 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3640 if (length < 5) return 0;
3642 for (size_t i = 1; i <= 4; ++i)
3644 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3646 // either a standalone character or a leading one
3647 if ((ch & 0xc0) != 0x80) return length - i;
3650 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3654 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3656 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3658 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3660 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3663 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3665 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3667 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3670 if (encoding == encoding_latin1)
3671 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3673 assert(false && "Invalid encoding");
3678 class xml_buffered_writer
3680 xml_buffered_writer(const xml_buffered_writer&);
3681 xml_buffered_writer& operator=(const xml_buffered_writer&);
3684 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3686 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3691 flush(buffer, bufsize);
3696 void flush(const char_t* data, size_t size)
3698 if (size == 0) return;
3700 // fast path, just write data
3701 if (encoding == get_write_native_encoding())
3702 writer.write(data, size * sizeof(char_t));
3706 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3707 assert(result <= sizeof(scratch));
3710 writer.write(scratch.data_u8, result);
3714 void write_direct(const char_t* data, size_t length)
3716 // flush the remaining buffer contents
3719 // handle large chunks
3720 if (length > bufcapacity)
3722 if (encoding == get_write_native_encoding())
3724 // fast path, can just write data chunk
3725 writer.write(data, length * sizeof(char_t));
3729 // need to convert in suitable chunks
3730 while (length > bufcapacity)
3732 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3733 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3734 size_t chunk_size = get_valid_length(data, bufcapacity);
3737 // convert chunk and write
3738 flush(data, chunk_size);
3742 length -= chunk_size;
3745 // small tail is copied below
3749 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3753 void write_buffer(const char_t* data, size_t length)
3755 size_t offset = bufsize;
3757 if (offset < bufcapacity && offset + length <= bufcapacity)
3759 memcpy(buffer + offset, data, length * sizeof(char_t));
3760 bufsize = offset + length;
3764 write_direct(data, length);
3768 void write_string(const char_t* data)
3770 // write the part of the string that fits in the buffer
3771 size_t offset = bufsize;
3773 while (*data && offset < bufcapacity)
3774 buffer[offset++] = *data++;
3777 if (offset < bufcapacity)
3783 // backtrack a bit if we have split the codepoint
3784 size_t length = offset - bufsize;
3785 size_t extra = length - get_valid_length(data - length, length);
3787 bufsize = offset - extra;
3789 write_direct(data - extra, strlength(data) + extra);
3793 void write(char_t d0)
3795 size_t offset = bufsize;
3796 if (offset > bufcapacity - 1) offset = flush();
3798 buffer[offset + 0] = d0;
3799 bufsize = offset + 1;
3802 void write(char_t d0, char_t d1)
3804 size_t offset = bufsize;
3805 if (offset > bufcapacity - 2) offset = flush();
3807 buffer[offset + 0] = d0;
3808 buffer[offset + 1] = d1;
3809 bufsize = offset + 2;
3812 void write(char_t d0, char_t d1, char_t d2)
3814 size_t offset = bufsize;
3815 if (offset > bufcapacity - 3) offset = flush();
3817 buffer[offset + 0] = d0;
3818 buffer[offset + 1] = d1;
3819 buffer[offset + 2] = d2;
3820 bufsize = offset + 3;
3823 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3825 size_t offset = bufsize;
3826 if (offset > bufcapacity - 4) offset = flush();
3828 buffer[offset + 0] = d0;
3829 buffer[offset + 1] = d1;
3830 buffer[offset + 2] = d2;
3831 buffer[offset + 3] = d3;
3832 bufsize = offset + 4;
3835 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3837 size_t offset = bufsize;
3838 if (offset > bufcapacity - 5) offset = flush();
3840 buffer[offset + 0] = d0;
3841 buffer[offset + 1] = d1;
3842 buffer[offset + 2] = d2;
3843 buffer[offset + 3] = d3;
3844 buffer[offset + 4] = d4;
3845 bufsize = offset + 5;
3848 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3850 size_t offset = bufsize;
3851 if (offset > bufcapacity - 6) offset = flush();
3853 buffer[offset + 0] = d0;
3854 buffer[offset + 1] = d1;
3855 buffer[offset + 2] = d2;
3856 buffer[offset + 3] = d3;
3857 buffer[offset + 4] = d4;
3858 buffer[offset + 5] = d5;
3859 bufsize = offset + 6;
3862 // utf8 maximum expansion: x4 (-> utf32)
3863 // utf16 maximum expansion: x2 (-> utf32)
3864 // utf32 maximum expansion: x1
3868 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3869 PUGIXML_MEMORY_OUTPUT_STACK
3874 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3877 char_t buffer[bufcapacity];
3881 uint8_t data_u8[4 * bufcapacity];
3882 uint16_t data_u16[2 * bufcapacity];
3883 uint32_t data_u32[bufcapacity];
3884 char_t data_char[bufcapacity];
3889 xml_encoding encoding;
3892 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3896 const char_t* prev = s;
3898 // While *s is a usual symbol
3899 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3901 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3907 writer.write('&', 'a', 'm', 'p', ';');
3911 writer.write('&', 'l', 't', ';');
3915 writer.write('&', 'g', 't', ';');
3919 writer.write('&', 'q', 'u', 'o', 't', ';');
3922 default: // s is not a usual symbol
3924 unsigned int ch = static_cast<unsigned int>(*s++);
3927 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3933 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3935 if (flags & format_no_escapes)
3936 writer.write_string(s);
3938 text_output_escaped(writer, s, type);
3941 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3945 writer.write('<', '!', '[', 'C', 'D');
3946 writer.write('A', 'T', 'A', '[');
3948 const char_t* prev = s;
3950 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3951 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3953 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3956 size_t bufLenght = static_cast<size_t>(s - prev);
3958 writer.write_buffer(prev, bufLenght);
3960 writer.write(']', ']', '>');
3965 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3967 switch (indent_length)
3971 for (unsigned int i = 0; i < depth; ++i)
3972 writer.write(indent[0]);
3978 for (unsigned int i = 0; i < depth; ++i)
3979 writer.write(indent[0], indent[1]);
3985 for (unsigned int i = 0; i < depth; ++i)
3986 writer.write(indent[0], indent[1], indent[2]);
3992 for (unsigned int i = 0; i < depth; ++i)
3993 writer.write(indent[0], indent[1], indent[2], indent[3]);
3999 for (unsigned int i = 0; i < depth; ++i)
4000 writer.write_buffer(indent, indent_length);
4005 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4007 writer.write('<', '!', '-', '-');
4011 const char_t* prev = s;
4013 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4014 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4016 size_t bufLenght = static_cast<size_t>(s - prev);
4018 writer.write_buffer(prev, bufLenght);
4024 writer.write('-', ' ');
4029 writer.write('-', '-', '>');
4032 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4036 const char_t* prev = s;
4038 // look for ?> sequence - we can't output it since ?> terminates PI
4039 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4041 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4045 assert(s[0] == '?' && s[1] == '>');
4047 writer.write('?', ' ', '>');
4053 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4055 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4057 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4059 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4063 text_output_indent(writer, indent, indent_length, depth + 1);
4070 writer.write_string(a->name ? a->name + 0 : default_name);
4071 writer.write('=', '"');
4074 text_output(writer, a->value, ctx_special_attr, flags);
4080 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4082 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4083 const char_t* name = node->name ? node->name + 0 : default_name;
4086 writer.write_string(name);
4088 if (node->first_attribute)
4089 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4091 // element nodes can have value if parse_embed_pcdata was used
4094 if (!node->first_child)
4096 if ((flags & format_raw) == 0)
4099 writer.write('/', '>');
4114 text_output(writer, node->value, ctx_special_pcdata, flags);
4116 if (!node->first_child)
4118 writer.write('<', '/');
4119 writer.write_string(name);
4131 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4133 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4134 const char_t* name = node->name ? node->name + 0 : default_name;
4136 writer.write('<', '/');
4137 writer.write_string(name);
4141 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4143 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4145 switch (PUGI__NODETYPE(node))
4148 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4152 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4156 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4160 writer.write('<', '?');
4161 writer.write_string(node->name ? node->name + 0 : default_name);
4166 node_output_pi_value(writer, node->value);
4169 writer.write('?', '>');
4172 case node_declaration:
4173 writer.write('<', '?');
4174 writer.write_string(node->name ? node->name + 0 : default_name);
4175 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4176 writer.write('?', '>');
4180 writer.write('<', '!', 'D', 'O', 'C');
4181 writer.write('T', 'Y', 'P', 'E');
4186 writer.write_string(node->value);
4193 assert(false && "Invalid node type");
4203 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4205 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4206 unsigned int indent_flags = indent_indent;
4208 xml_node_struct* node = root;
4214 // begin writing current node
4215 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4217 node_output_simple(writer, node, flags);
4223 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4226 if ((indent_flags & indent_indent) && indent_length)
4227 text_output_indent(writer, indent, indent_length, depth);
4229 if (PUGI__NODETYPE(node) == node_element)
4231 indent_flags = indent_newline | indent_indent;
4233 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4235 // element nodes can have value if parse_embed_pcdata was used
4239 node = node->first_child;
4244 else if (PUGI__NODETYPE(node) == node_document)
4246 indent_flags = indent_indent;
4248 if (node->first_child)
4250 node = node->first_child;
4256 node_output_simple(writer, node, flags);
4258 indent_flags = indent_newline | indent_indent;
4262 // continue to the next node
4263 while (node != root)
4265 if (node->next_sibling)
4267 node = node->next_sibling;
4271 node = node->parent;
4273 // write closing node
4274 if (PUGI__NODETYPE(node) == node_element)
4278 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4281 if ((indent_flags & indent_indent) && indent_length)
4282 text_output_indent(writer, indent, indent_length, depth);
4284 node_output_end(writer, node);
4286 indent_flags = indent_newline | indent_indent;
4290 while (node != root);
4292 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4296 PUGI__FN bool has_declaration(xml_node_struct* node)
4298 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4300 xml_node_type type = PUGI__NODETYPE(child);
4302 if (type == node_declaration) return true;
4303 if (type == node_element) return false;
4309 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4311 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4318 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4320 return parent == node_element || parent == node_declaration;
4323 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4325 if (parent != node_document && parent != node_element) return false;
4326 if (child == node_document || child == node_null) return false;
4327 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4332 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4334 // check that child can be a child of parent
4335 if (!allow_insert_child(parent.type(), child.type()))
4338 // check that node is not moved between documents
4339 if (parent.root() != child.root())
4342 // check that new parent is not in the child subtree
4343 xml_node cur = parent;
4356 template <typename String, typename Header>
4357 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4359 assert(!dest && (header & header_mask) == 0);
4363 if (alloc && (source_header & header_mask) == 0)
4367 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4368 header |= xml_memory_page_contents_shared_mask;
4369 source_header |= xml_memory_page_contents_shared_mask;
4372 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4376 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4378 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4379 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4381 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4383 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4387 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4388 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4393 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4395 xml_allocator& alloc = get_allocator(dn);
4396 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4398 node_copy_contents(dn, sn, shared_alloc);
4400 xml_node_struct* dit = dn;
4401 xml_node_struct* sit = sn->first_child;
4403 while (sit && sit != sn)
4407 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4411 node_copy_contents(copy, sit, shared_alloc);
4413 if (sit->first_child)
4416 sit = sit->first_child;
4422 // continue to the next node
4425 if (sit->next_sibling)
4427 sit = sit->next_sibling;
4438 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4440 xml_allocator& alloc = get_allocator(da);
4441 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4443 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4444 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4447 inline bool is_text_node(xml_node_struct* node)
4449 xml_node_type type = PUGI__NODETYPE(node);
4451 return type == node_pcdata || type == node_cdata;
4454 // get value with conversion functions
4455 template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4458 const char_t* s = value;
4460 while (PUGI__IS_CHARTYPE(*s, ct_space))
4463 bool negative = (*s == '-');
4465 s += (*s == '+' || *s == '-');
4467 bool overflow = false;
4469 if (s[0] == '0' && (s[1] | ' ') == 'x')
4473 // since overflow detection relies on length of the sequence skip leading zeros
4477 const char_t* start = s;
4481 if (static_cast<unsigned>(*s - '0') < 10)
4482 result = result * 16 + (*s - '0');
4483 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4484 result = result * 16 + ((*s | ' ') - 'a' + 10);
4491 size_t digits = static_cast<size_t>(s - start);
4493 overflow = digits > sizeof(U) * 2;
4497 // since overflow detection relies on length of the sequence skip leading zeros
4501 const char_t* start = s;
4505 if (static_cast<unsigned>(*s - '0') < 10)
4506 result = result * 10 + (*s - '0');
4513 size_t digits = static_cast<size_t>(s - start);
4515 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4517 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4518 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4519 const size_t high_bit = sizeof(U) * 8 - 1;
4521 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4525 return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4527 return (overflow || result > maxpos) ? maxpos : result;
4530 PUGI__FN int get_value_int(const char_t* value)
4532 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4535 PUGI__FN unsigned int get_value_uint(const char_t* value)
4537 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4540 PUGI__FN double get_value_double(const char_t* value)
4542 #ifdef PUGIXML_WCHAR_MODE
4543 return wcstod(value, 0);
4545 return strtod(value, 0);
4549 PUGI__FN float get_value_float(const char_t* value)
4551 #ifdef PUGIXML_WCHAR_MODE
4552 return static_cast<float>(wcstod(value, 0));
4554 return static_cast<float>(strtod(value, 0));
4558 PUGI__FN bool get_value_bool(const char_t* value)
4560 // only look at first char
4561 char_t first = *value;
4563 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4564 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4567 #ifdef PUGIXML_HAS_LONG_LONG
4568 PUGI__FN long long get_value_llong(const char_t* value)
4570 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4573 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4575 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4579 template <typename T> struct make_unsigned;
4581 template <> struct make_unsigned<int> { typedef unsigned int type; };
4582 template <> struct make_unsigned<unsigned int> { typedef unsigned int type; };
4583 template <> struct make_unsigned<long> { typedef unsigned long type; };
4584 template <> struct make_unsigned<unsigned long> { typedef unsigned long type; };
4586 #ifdef PUGIXML_HAS_LONG_LONG
4587 template <> struct make_unsigned<long long> { typedef unsigned long long type; };
4588 template <> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
4591 template <typename T>
4592 PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, T value)
4594 typedef typename make_unsigned<T>::type U;
4596 bool negative = value < 0;
4598 char_t* result = end - 1;
4599 U rest = negative ? 0 - U(value) : U(value);
4603 *result-- = static_cast<char_t>('0' + (rest % 10));
4608 assert(result >= begin);
4613 return result + !negative;
4616 // set value with conversion functions
4617 template <typename String, typename Header>
4618 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4620 #ifdef PUGIXML_WCHAR_MODE
4622 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4625 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4627 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4629 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4633 template <typename String, typename Header, typename Integer>
4634 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, Integer value)
4637 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4638 char_t* begin = integer_to_string(buf, end, value);
4640 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4643 template <typename String, typename Header>
4644 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4647 sprintf(buf, "%.9g", value);
4649 return set_value_ascii(dest, header, header_mask, buf);
4652 template <typename String, typename Header>
4653 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4656 sprintf(buf, "%.17g", value);
4658 return set_value_ascii(dest, header, header_mask, buf);
4661 template <typename String, typename Header>
4662 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
4664 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4667 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4669 // check input buffer
4670 if (!contents && size) return make_parse_result(status_io_error);
4672 // get actual encoding
4673 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4675 // get private buffer
4680 xml_parse_result failed;
4681 failed.status = xml_parse_status::status_internal_error;
4682 failed.offset = (ptrdiff_t)0;
4686 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4688 // delete original buffer if we performed a conversion
4689 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4691 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4692 if (own || buffer != contents) *out_buffer = buffer;
4694 // store buffer for offset_debug
4695 doc->buffer = buffer;
4698 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4700 // remember encoding
4701 res.encoding = buffer_encoding;
4706 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4707 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4709 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4710 // there are 64-bit versions of fseek/ftell, let's use them
4711 typedef __int64 length_type;
4713 _fseeki64(file, 0, SEEK_END);
4714 length_type length = _ftelli64(file);
4715 _fseeki64(file, 0, SEEK_SET);
4716 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4717 // there are 64-bit versions of fseek/ftell, let's use them
4718 typedef off64_t length_type;
4720 fseeko64(file, 0, SEEK_END);
4721 length_type length = ftello64(file);
4722 fseeko64(file, 0, SEEK_SET);
4724 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4725 typedef long length_type;
4727 fseek(file, 0, SEEK_END);
4728 length_type length = ftell(file);
4729 fseek(file, 0, SEEK_SET);
4732 // check for I/O errors
4733 if (length < 0) return status_io_error;
4735 // check for overflow
4736 size_t result = static_cast<size_t>(length);
4738 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4741 out_result = result;
4746 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4747 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4749 // We only need to zero-terminate if encoding conversion does not do it for us
4750 #ifdef PUGIXML_WCHAR_MODE
4751 xml_encoding wchar_encoding = get_wchar_encoding();
4753 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4755 size_t length = size / sizeof(char_t);
4757 static_cast<char_t*>(buffer)[length] = 0;
4758 return (length + 1) * sizeof(char_t);
4761 if (encoding == encoding_utf8)
4763 static_cast<char*>(buffer)[size] = 0;
4771 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4773 if (!file) return make_parse_result(status_file_not_found);
4775 // get file size (can result in I/O errors)
4777 xml_parse_status size_status = get_file_size(file, size);
4778 if (size_status != status_ok) return make_parse_result(size_status);
4780 size_t max_suffix_size = sizeof(char_t);
4782 // allocate buffer for the whole file
4783 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4784 if (!contents) return make_parse_result(status_out_of_memory);
4786 // read file in memory
4787 size_t read_size = fread(contents, 1, size, file);
4789 if (read_size != size)
4791 xml_memory::deallocate(contents);
4792 return make_parse_result(status_io_error);
4795 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4797 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4800 PUGI__FN void close_file(FILE* file)
4805 #ifndef PUGIXML_NO_STL
4806 template <typename T> struct xml_stream_chunk
4808 static xml_stream_chunk* create()
4810 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4811 if (!memory) return 0;
4813 return new (memory) xml_stream_chunk();
4816 static void destroy(xml_stream_chunk* chunk)
4821 xml_stream_chunk* next_ = chunk->next;
4823 xml_memory::deallocate(chunk);
4829 xml_stream_chunk(): next(0), size(0)
4833 xml_stream_chunk* next;
4836 T data[xml_memory_page_size / sizeof(T)];
4839 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4841 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4843 // read file to a chunk list
4845 xml_stream_chunk<T>* last = 0;
4847 while (!stream.eof())
4849 // allocate new chunk
4850 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4851 if (!chunk) return status_out_of_memory;
4853 // append chunk to list
4854 if (last) last = last->next = chunk;
4855 else chunks.data = last = chunk;
4857 // read data to chunk
4858 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4859 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4861 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4862 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4864 // guard against huge files (chunk size is small enough to make this overflow check work)
4865 if (total + chunk->size < total) return status_out_of_memory;
4866 total += chunk->size;
4869 size_t max_suffix_size = sizeof(char_t);
4871 // copy chunk list to a contiguous buffer
4872 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4873 if (!buffer) return status_out_of_memory;
4875 char* write = buffer;
4877 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4879 assert(write + chunk->size <= buffer + total);
4880 memcpy(write, chunk->data, chunk->size);
4881 write += chunk->size;
4884 assert(write == buffer + total);
4887 *out_buffer = buffer;
4893 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4895 // get length of remaining data in stream
4896 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4897 stream.seekg(0, std::ios::end);
4898 std::streamoff length = stream.tellg() - pos;
4901 if (stream.fail() || pos < 0) return status_io_error;
4903 // guard against huge files
4904 size_t read_length = static_cast<size_t>(length);
4906 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4908 size_t max_suffix_size = sizeof(char_t);
4910 // read stream data into memory (guard against stream exceptions with buffer holder)
4911 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4912 if (!buffer.data) return status_out_of_memory;
4914 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4916 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4917 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4920 size_t actual_length = static_cast<size_t>(stream.gcount());
4921 assert(actual_length <= read_length);
4923 *out_buffer = buffer.release();
4924 *out_size = actual_length * sizeof(T);
4929 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4933 xml_parse_status status = status_ok;
4935 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4936 if (stream.fail()) return make_parse_result(status_io_error);
4938 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4939 if (stream.tellg() < 0)
4941 stream.clear(); // clear error flags that could be set by a failing tellg
4942 status = load_stream_data_noseek(stream, &buffer, &size);
4945 status = load_stream_data_seek(stream, &buffer, &size);
4947 if (status != status_ok) return make_parse_result(status);
4949 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4951 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4955 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4956 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4958 return _wfopen(path, mode);
4961 PUGI__FN char* convert_path_heap(const wchar_t* str)
4965 // first pass: get length in utf8 characters
4966 size_t length = strlength_wide(str);
4967 size_t size = as_utf8_begin(str, length);
4969 // allocate resulting string
4970 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4971 if (!result) return 0;
4973 // second pass: convert to utf8
4974 as_utf8_end(result, size, str, length);
4982 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4984 // there is no standard function to open wide paths, so our best bet is to try utf8 path
4985 char* path_utf8 = convert_path_heap(path);
4986 if (!path_utf8) return 0;
4988 // convert mode to ASCII (we mirror _wfopen interface)
4989 char mode_ascii[4] = {0};
4990 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4992 // try to open the utf8 path
4993 FILE* result = fopen(path_utf8, mode_ascii);
4995 // free dummy buffer
4996 xml_memory::deallocate(path_utf8);
5002 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5004 if (!file) return false;
5006 xml_writer_file writer(file);
5007 doc.save(writer, indent, flags, encoding);
5009 return ferror(file) == 0;
5012 struct name_null_sentry
5014 xml_node_struct* node;
5017 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5031 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5035 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5037 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5038 (void)!result; // unfortunately we can't do proper error handling here
5041 #ifndef PUGIXML_NO_STL
5042 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5046 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5050 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5054 assert(!wide_stream);
5055 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5059 assert(wide_stream);
5060 assert(size % sizeof(wchar_t) == 0);
5062 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5067 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5071 PUGI__FN xml_tree_walker::~xml_tree_walker()
5075 PUGI__FN int xml_tree_walker::depth() const
5080 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5085 PUGI__FN bool xml_tree_walker::end(xml_node&)
5090 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5094 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5098 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5102 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5104 return _attr ? unspecified_bool_xml_attribute : 0;
5107 PUGI__FN bool xml_attribute::operator!() const
5112 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5114 return (_attr == r._attr);
5117 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5119 return (_attr != r._attr);
5122 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5124 return (_attr < r._attr);
5127 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5129 return (_attr > r._attr);
5132 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5134 return (_attr <= r._attr);
5137 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5139 return (_attr >= r._attr);
5142 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5144 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5147 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5149 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5152 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5154 return (_attr && _attr->value) ? _attr->value + 0 : def;
5157 PUGI__FN int xml_attribute::as_int(int def) const
5159 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5162 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5164 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5167 PUGI__FN double xml_attribute::as_double(double def) const
5169 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5172 PUGI__FN float xml_attribute::as_float(float def) const
5174 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5177 PUGI__FN bool xml_attribute::as_bool(bool def) const
5179 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5182 #ifdef PUGIXML_HAS_LONG_LONG
5183 PUGI__FN long long xml_attribute::as_llong(long long def) const
5185 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5188 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5190 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5194 PUGI__FN bool xml_attribute::empty() const
5199 PUGI__FN const char_t* xml_attribute::name() const
5201 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5204 PUGI__FN const char_t* xml_attribute::value() const
5206 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5209 PUGI__FN size_t xml_attribute::hash_value() const
5211 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5214 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5219 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5225 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5231 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5237 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5243 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5249 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5255 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5261 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5267 #ifdef PUGIXML_HAS_LONG_LONG
5268 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5274 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5281 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5283 if (!_attr) return false;
5285 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5288 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5290 if (!_attr) return false;
5292 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5295 PUGI__FN bool xml_attribute::set_value(int rhs)
5297 if (!_attr) return false;
5299 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5302 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5304 if (!_attr) return false;
5306 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5309 PUGI__FN bool xml_attribute::set_value(long rhs)
5311 if (!_attr) return false;
5313 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5316 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5318 if (!_attr) return false;
5320 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5323 PUGI__FN bool xml_attribute::set_value(double rhs)
5325 if (!_attr) return false;
5327 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5330 PUGI__FN bool xml_attribute::set_value(float rhs)
5332 if (!_attr) return false;
5334 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5337 PUGI__FN bool xml_attribute::set_value(bool rhs)
5339 if (!_attr) return false;
5341 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5344 #ifdef PUGIXML_HAS_LONG_LONG
5345 PUGI__FN bool xml_attribute::set_value(long long rhs)
5347 if (!_attr) return false;
5349 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5352 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5354 if (!_attr) return false;
5356 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5361 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5363 return (bool)lhs && rhs;
5366 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5368 return (bool)lhs || rhs;
5372 PUGI__FN xml_node::xml_node(): _root(0)
5376 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5380 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5384 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5386 return _root ? unspecified_bool_xml_node : 0;
5389 PUGI__FN bool xml_node::operator!() const
5394 PUGI__FN xml_node::iterator xml_node::begin() const
5396 return iterator(_root ? _root->first_child + 0 : 0, _root);
5399 PUGI__FN xml_node::iterator xml_node::end() const
5401 return iterator(0, _root);
5404 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5406 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5409 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5411 return attribute_iterator(0, _root);
5414 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5416 return xml_object_range<xml_node_iterator>(begin(), end());
5419 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5421 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5424 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5426 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5429 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5431 return (_root == r._root);
5434 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5436 return (_root != r._root);
5439 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5441 return (_root < r._root);
5444 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5446 return (_root > r._root);
5449 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5451 return (_root <= r._root);
5454 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5456 return (_root >= r._root);
5459 PUGI__FN bool xml_node::empty() const
5464 PUGI__FN const char_t* xml_node::name() const
5466 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5469 PUGI__FN xml_node_type xml_node::type() const
5471 return _root ? PUGI__NODETYPE(_root) : node_null;
5474 PUGI__FN const char_t* xml_node::value() const
5476 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5479 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5481 if (!_root) return xml_node();
5483 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5484 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5489 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5491 if (!_root) return xml_attribute();
5493 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5494 if (i->name && impl::strequal(name_, i->name))
5495 return xml_attribute(i);
5497 return xml_attribute();
5500 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5502 if (!_root) return xml_node();
5504 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5505 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5510 PUGI__FN xml_node xml_node::next_sibling() const
5512 return _root ? xml_node(_root->next_sibling) : xml_node();
5515 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5517 if (!_root) return xml_node();
5519 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5520 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5525 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5527 xml_attribute_struct* hint = hint_._attr;
5529 // if hint is not an attribute of node, behavior is not defined
5530 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5532 if (!_root) return xml_attribute();
5534 // optimistically search from hint up until the end
5535 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5536 if (i->name && impl::strequal(name_, i->name))
5538 // update hint to maximize efficiency of searching for consecutive attributes
5539 hint_._attr = i->next_attribute;
5541 return xml_attribute(i);
5544 // wrap around and search from the first attribute until the hint
5545 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5546 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5547 if (j->name && impl::strequal(name_, j->name))
5549 // update hint to maximize efficiency of searching for consecutive attributes
5550 hint_._attr = j->next_attribute;
5552 return xml_attribute(j);
5555 return xml_attribute();
5558 PUGI__FN xml_node xml_node::previous_sibling() const
5560 if (!_root) return xml_node();
5562 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5563 else return xml_node();
5566 PUGI__FN xml_node xml_node::parent() const
5568 return _root ? xml_node(_root->parent) : xml_node();
5571 PUGI__FN xml_node xml_node::root() const
5573 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5576 PUGI__FN xml_text xml_node::text() const
5578 return xml_text(_root);
5581 PUGI__FN const char_t* xml_node::child_value() const
5583 if (!_root) return PUGIXML_TEXT("");
5585 // element nodes can have value if parse_embed_pcdata was used
5586 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5587 return _root->value;
5589 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5590 if (impl::is_text_node(i) && i->value)
5593 return PUGIXML_TEXT("");
5596 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5598 return child(name_).child_value();
5601 PUGI__FN xml_attribute xml_node::first_attribute() const
5603 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5606 PUGI__FN xml_attribute xml_node::last_attribute() const
5608 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5611 PUGI__FN xml_node xml_node::first_child() const
5613 return _root ? xml_node(_root->first_child) : xml_node();
5616 PUGI__FN xml_node xml_node::last_child() const
5618 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5621 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5623 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5625 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5628 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5631 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5633 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5635 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5638 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5641 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5643 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5645 impl::xml_allocator& alloc = impl::get_allocator(_root);
5646 if (!alloc.reserve()) return xml_attribute();
5648 xml_attribute a(impl::allocate_attribute(alloc));
5649 if (!a) return xml_attribute();
5651 impl::append_attribute(a._attr, _root);
5658 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5660 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5662 impl::xml_allocator& alloc = impl::get_allocator(_root);
5663 if (!alloc.reserve()) return xml_attribute();
5665 xml_attribute a(impl::allocate_attribute(alloc));
5666 if (!a) return xml_attribute();
5668 impl::prepend_attribute(a._attr, _root);
5675 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5677 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5678 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5680 impl::xml_allocator& alloc = impl::get_allocator(_root);
5681 if (!alloc.reserve()) return xml_attribute();
5683 xml_attribute a(impl::allocate_attribute(alloc));
5684 if (!a) return xml_attribute();
5686 impl::insert_attribute_after(a._attr, attr._attr, _root);
5693 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5695 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5696 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5698 impl::xml_allocator& alloc = impl::get_allocator(_root);
5699 if (!alloc.reserve()) return xml_attribute();
5701 xml_attribute a(impl::allocate_attribute(alloc));
5702 if (!a) return xml_attribute();
5704 impl::insert_attribute_before(a._attr, attr._attr, _root);
5711 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5713 if (!proto) return xml_attribute();
5714 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5716 impl::xml_allocator& alloc = impl::get_allocator(_root);
5717 if (!alloc.reserve()) return xml_attribute();
5719 xml_attribute a(impl::allocate_attribute(alloc));
5720 if (!a) return xml_attribute();
5722 impl::append_attribute(a._attr, _root);
5723 impl::node_copy_attribute(a._attr, proto._attr);
5728 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5730 if (!proto) return xml_attribute();
5731 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5733 impl::xml_allocator& alloc = impl::get_allocator(_root);
5734 if (!alloc.reserve()) return xml_attribute();
5736 xml_attribute a(impl::allocate_attribute(alloc));
5737 if (!a) return xml_attribute();
5739 impl::prepend_attribute(a._attr, _root);
5740 impl::node_copy_attribute(a._attr, proto._attr);
5745 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5747 if (!proto) return xml_attribute();
5748 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5749 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5751 impl::xml_allocator& alloc = impl::get_allocator(_root);
5752 if (!alloc.reserve()) return xml_attribute();
5754 xml_attribute a(impl::allocate_attribute(alloc));
5755 if (!a) return xml_attribute();
5757 impl::insert_attribute_after(a._attr, attr._attr, _root);
5758 impl::node_copy_attribute(a._attr, proto._attr);
5763 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5765 if (!proto) return xml_attribute();
5766 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5767 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5769 impl::xml_allocator& alloc = impl::get_allocator(_root);
5770 if (!alloc.reserve()) return xml_attribute();
5772 xml_attribute a(impl::allocate_attribute(alloc));
5773 if (!a) return xml_attribute();
5775 impl::insert_attribute_before(a._attr, attr._attr, _root);
5776 impl::node_copy_attribute(a._attr, proto._attr);
5781 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5783 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5785 impl::xml_allocator& alloc = impl::get_allocator(_root);
5786 if (!alloc.reserve()) return xml_node();
5788 xml_node n(impl::allocate_node(alloc, type_));
5789 if (!n) return xml_node();
5791 impl::append_node(n._root, _root);
5793 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5798 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5800 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5802 impl::xml_allocator& alloc = impl::get_allocator(_root);
5803 if (!alloc.reserve()) return xml_node();
5805 xml_node n(impl::allocate_node(alloc, type_));
5806 if (!n) return xml_node();
5808 impl::prepend_node(n._root, _root);
5810 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5815 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5817 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5818 if (!node._root || node._root->parent != _root) return xml_node();
5820 impl::xml_allocator& alloc = impl::get_allocator(_root);
5821 if (!alloc.reserve()) return xml_node();
5823 xml_node n(impl::allocate_node(alloc, type_));
5824 if (!n) return xml_node();
5826 impl::insert_node_before(n._root, node._root);
5828 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5833 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5835 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5836 if (!node._root || node._root->parent != _root) return xml_node();
5838 impl::xml_allocator& alloc = impl::get_allocator(_root);
5839 if (!alloc.reserve()) return xml_node();
5841 xml_node n(impl::allocate_node(alloc, type_));
5842 if (!n) return xml_node();
5844 impl::insert_node_after(n._root, node._root);
5846 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5851 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5853 xml_node result = append_child(node_element);
5855 result.set_name(name_);
5860 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5862 xml_node result = prepend_child(node_element);
5864 result.set_name(name_);
5869 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5871 xml_node result = insert_child_after(node_element, node);
5873 result.set_name(name_);
5878 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5880 xml_node result = insert_child_before(node_element, node);
5882 result.set_name(name_);
5887 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5889 xml_node_type type_ = proto.type();
5890 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5892 impl::xml_allocator& alloc = impl::get_allocator(_root);
5893 if (!alloc.reserve()) return xml_node();
5895 xml_node n(impl::allocate_node(alloc, type_));
5896 if (!n) return xml_node();
5898 impl::append_node(n._root, _root);
5899 impl::node_copy_tree(n._root, proto._root);
5904 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5906 xml_node_type type_ = proto.type();
5907 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5909 impl::xml_allocator& alloc = impl::get_allocator(_root);
5910 if (!alloc.reserve()) return xml_node();
5912 xml_node n(impl::allocate_node(alloc, type_));
5913 if (!n) return xml_node();
5915 impl::prepend_node(n._root, _root);
5916 impl::node_copy_tree(n._root, proto._root);
5921 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5923 xml_node_type type_ = proto.type();
5924 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5925 if (!node._root || node._root->parent != _root) return xml_node();
5927 impl::xml_allocator& alloc = impl::get_allocator(_root);
5928 if (!alloc.reserve()) return xml_node();
5930 xml_node n(impl::allocate_node(alloc, type_));
5931 if (!n) return xml_node();
5933 impl::insert_node_after(n._root, node._root);
5934 impl::node_copy_tree(n._root, proto._root);
5939 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5941 xml_node_type type_ = proto.type();
5942 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5943 if (!node._root || node._root->parent != _root) return xml_node();
5945 impl::xml_allocator& alloc = impl::get_allocator(_root);
5946 if (!alloc.reserve()) return xml_node();
5948 xml_node n(impl::allocate_node(alloc, type_));
5949 if (!n) return xml_node();
5951 impl::insert_node_before(n._root, node._root);
5952 impl::node_copy_tree(n._root, proto._root);
5957 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5959 if (!impl::allow_move(*this, moved)) return xml_node();
5961 impl::xml_allocator& alloc = impl::get_allocator(_root);
5962 if (!alloc.reserve()) return xml_node();
5964 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5965 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5967 impl::remove_node(moved._root);
5968 impl::append_node(moved._root, _root);
5973 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5975 if (!impl::allow_move(*this, moved)) return xml_node();
5977 impl::xml_allocator& alloc = impl::get_allocator(_root);
5978 if (!alloc.reserve()) return xml_node();
5980 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5981 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5983 impl::remove_node(moved._root);
5984 impl::prepend_node(moved._root, _root);
5989 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5991 if (!impl::allow_move(*this, moved)) return xml_node();
5992 if (!node._root || node._root->parent != _root) return xml_node();
5993 if (moved._root == node._root) return xml_node();
5995 impl::xml_allocator& alloc = impl::get_allocator(_root);
5996 if (!alloc.reserve()) return xml_node();
5998 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5999 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6001 impl::remove_node(moved._root);
6002 impl::insert_node_after(moved._root, node._root);
6007 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6009 if (!impl::allow_move(*this, moved)) return xml_node();
6010 if (!node._root || node._root->parent != _root) return xml_node();
6011 if (moved._root == node._root) return xml_node();
6013 impl::xml_allocator& alloc = impl::get_allocator(_root);
6014 if (!alloc.reserve()) return xml_node();
6016 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6017 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6019 impl::remove_node(moved._root);
6020 impl::insert_node_before(moved._root, node._root);
6025 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6027 return remove_attribute(attribute(name_));
6030 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6032 if (!_root || !a._attr) return false;
6033 if (!impl::is_attribute_of(a._attr, _root)) return false;
6035 impl::xml_allocator& alloc = impl::get_allocator(_root);
6036 if (!alloc.reserve()) return false;
6038 impl::remove_attribute(a._attr, _root);
6039 impl::destroy_attribute(a._attr, alloc);
6044 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6046 return remove_child(child(name_));
6049 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6051 if (!_root || !n._root || n._root->parent != _root) return false;
6053 impl::xml_allocator& alloc = impl::get_allocator(_root);
6054 if (!alloc.reserve()) return false;
6056 impl::remove_node(n._root);
6057 impl::destroy_node(n._root, alloc);
6062 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6064 // append_buffer is only valid for elements/documents
6065 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6067 // get document node
6068 impl::xml_document_struct* doc = &impl::get_document(_root);
6070 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6071 doc->header |= impl::xml_memory_page_contents_shared_mask;
6073 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6074 impl::xml_memory_page* page = 0;
6075 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
6078 if (!extra) return impl::make_parse_result(status_out_of_memory);
6080 // add extra buffer to the list
6082 extra->next = doc->extra_buffers;
6083 doc->extra_buffers = extra;
6085 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6086 impl::name_null_sentry sentry(_root);
6088 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6091 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6093 if (!_root) return xml_node();
6095 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6096 if (i->name && impl::strequal(name_, i->name))
6098 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6099 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6106 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6108 if (!_root) return xml_node();
6110 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6111 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6112 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6118 #ifndef PUGIXML_NO_STL
6119 PUGI__FN string_t xml_node::path(char_t delimiter) const
6121 if (!_root) return string_t();
6125 for (xml_node_struct* i = _root; i; i = i->parent)
6127 offset += (i != _root);
6128 offset += i->name ? impl::strlength(i->name) : 0;
6132 result.resize(offset);
6134 for (xml_node_struct* j = _root; j; j = j->parent)
6137 result[--offset] = delimiter;
6139 if (j->name && *j->name)
6141 size_t length = impl::strlength(j->name);
6144 memcpy(&result[offset], j->name, length * sizeof(char_t));
6148 assert(offset == 0);
6154 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6156 xml_node found = *this; // Current search context.
6158 if (!_root || !path_ || !path_[0]) return found;
6160 if (path_[0] == delimiter)
6162 // Absolute path; e.g. '/foo/bar'
6163 found = found.root();
6167 const char_t* path_segment = path_;
6169 while (*path_segment == delimiter) ++path_segment;
6171 const char_t* path_segment_end = path_segment;
6173 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6175 if (path_segment == path_segment_end) return found;
6177 const char_t* next_segment = path_segment_end;
6179 while (*next_segment == delimiter) ++next_segment;
6181 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6182 return found.first_element_by_path(next_segment, delimiter);
6183 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6184 return found.parent().first_element_by_path(next_segment, delimiter);
6188 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6190 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6192 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6194 if (subsearch) return subsearch;
6203 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6207 xml_node arg_begin = *this;
6208 if (!walker.begin(arg_begin)) return false;
6210 xml_node cur = first_child();
6218 xml_node arg_for_each = cur;
6219 if (!walker.for_each(arg_for_each))
6222 if (cur.first_child())
6225 cur = cur.first_child();
6227 else if (cur.next_sibling())
6228 cur = cur.next_sibling();
6231 // Borland C++ workaround
6232 while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
6239 cur = cur.next_sibling();
6242 while (cur && cur != *this);
6245 assert(walker._depth == -1);
6247 xml_node arg_end = *this;
6248 return walker.end(arg_end);
6251 PUGI__FN size_t xml_node::hash_value() const
6253 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6256 PUGI__FN xml_node_struct* xml_node::internal_object() const
6261 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6265 impl::xml_buffered_writer buffered_writer(writer, encoding);
6267 impl::node_output(buffered_writer, _root, indent, flags, depth);
6269 buffered_writer.flush();
6272 #ifndef PUGIXML_NO_STL
6273 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6275 xml_writer_stream writer(stream);
6277 print(writer, indent, flags, encoding, depth);
6280 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6282 xml_writer_stream writer(stream);
6284 print(writer, indent, flags, encoding_wchar, depth);
6288 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6290 if (!_root) return -1;
6292 impl::xml_document_struct& doc = impl::get_document(_root);
6294 // we can determine the offset reliably only if there is exactly once parse buffer
6295 if (!doc.buffer || doc.extra_buffers) return -1;
6303 case node_declaration:
6305 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6311 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6319 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6321 return (bool)lhs && rhs;
6324 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6326 return (bool)lhs || rhs;
6330 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6334 PUGI__FN xml_node_struct* xml_text::_data() const
6336 if (!_root || impl::is_text_node(_root)) return _root;
6338 // element nodes can have value if parse_embed_pcdata was used
6339 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6342 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6343 if (impl::is_text_node(node))
6349 PUGI__FN xml_node_struct* xml_text::_data_new()
6351 xml_node_struct* d = _data();
6354 return xml_node(_root).append_child(node_pcdata).internal_object();
6357 PUGI__FN xml_text::xml_text(): _root(0)
6361 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6365 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6367 return _data() ? unspecified_bool_xml_text : 0;
6370 PUGI__FN bool xml_text::operator!() const
6375 PUGI__FN bool xml_text::empty() const
6377 return _data() == 0;
6380 PUGI__FN const char_t* xml_text::get() const
6382 xml_node_struct* d = _data();
6384 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6387 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6389 xml_node_struct* d = _data();
6391 return (d && d->value) ? d->value + 0 : def;
6394 PUGI__FN int xml_text::as_int(int def) const
6396 xml_node_struct* d = _data();
6398 return (d && d->value) ? impl::get_value_int(d->value) : def;
6401 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6403 xml_node_struct* d = _data();
6405 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6408 PUGI__FN double xml_text::as_double(double def) const
6410 xml_node_struct* d = _data();
6412 return (d && d->value) ? impl::get_value_double(d->value) : def;
6415 PUGI__FN float xml_text::as_float(float def) const
6417 xml_node_struct* d = _data();
6419 return (d && d->value) ? impl::get_value_float(d->value) : def;
6422 PUGI__FN bool xml_text::as_bool(bool def) const
6424 xml_node_struct* d = _data();
6426 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6429 #ifdef PUGIXML_HAS_LONG_LONG
6430 PUGI__FN long long xml_text::as_llong(long long def) const
6432 xml_node_struct* d = _data();
6434 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6437 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6439 xml_node_struct* d = _data();
6441 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6445 PUGI__FN bool xml_text::set(const char_t* rhs)
6447 xml_node_struct* dn = _data_new();
6449 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6452 PUGI__FN bool xml_text::set(int rhs)
6454 xml_node_struct* dn = _data_new();
6456 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6459 PUGI__FN bool xml_text::set(unsigned int rhs)
6461 xml_node_struct* dn = _data_new();
6463 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6466 PUGI__FN bool xml_text::set(long rhs)
6468 xml_node_struct* dn = _data_new();
6470 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6473 PUGI__FN bool xml_text::set(unsigned long rhs)
6475 xml_node_struct* dn = _data_new();
6477 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6480 PUGI__FN bool xml_text::set(float rhs)
6482 xml_node_struct* dn = _data_new();
6484 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6487 PUGI__FN bool xml_text::set(double rhs)
6489 xml_node_struct* dn = _data_new();
6491 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6494 PUGI__FN bool xml_text::set(bool rhs)
6496 xml_node_struct* dn = _data_new();
6498 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6501 #ifdef PUGIXML_HAS_LONG_LONG
6502 PUGI__FN bool xml_text::set(long long rhs)
6504 xml_node_struct* dn = _data_new();
6506 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6509 PUGI__FN bool xml_text::set(unsigned long long rhs)
6511 xml_node_struct* dn = _data_new();
6513 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6517 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6523 PUGI__FN xml_text& xml_text::operator=(int rhs)
6529 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6535 PUGI__FN xml_text& xml_text::operator=(long rhs)
6541 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6547 PUGI__FN xml_text& xml_text::operator=(double rhs)
6553 PUGI__FN xml_text& xml_text::operator=(float rhs)
6559 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6565 #ifdef PUGIXML_HAS_LONG_LONG
6566 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6572 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6579 PUGI__FN xml_node xml_text::data() const
6581 return xml_node(_data());
6585 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6587 return (bool)lhs && rhs;
6590 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6592 return (bool)lhs || rhs;
6596 PUGI__FN xml_node_iterator::xml_node_iterator()
6600 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6604 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6608 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6610 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6613 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6615 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6618 PUGI__FN xml_node& xml_node_iterator::operator*() const
6620 assert(_wrap._root);
6624 PUGI__FN xml_node* xml_node_iterator::operator->() const
6626 assert(_wrap._root);
6627 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6630 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6632 assert(_wrap._root);
6633 _wrap._root = _wrap._root->next_sibling;
6637 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6639 xml_node_iterator temp = *this;
6644 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6646 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6650 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6652 xml_node_iterator temp = *this;
6657 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6661 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6665 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6669 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6671 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6674 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6676 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6679 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6681 assert(_wrap._attr);
6685 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6687 assert(_wrap._attr);
6688 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6691 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6693 assert(_wrap._attr);
6694 _wrap._attr = _wrap._attr->next_attribute;
6698 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6700 xml_attribute_iterator temp = *this;
6705 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6707 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6711 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6713 xml_attribute_iterator temp = *this;
6718 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6722 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6726 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6730 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6732 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6735 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6737 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6740 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6742 assert(_wrap._root);
6746 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6748 assert(_wrap._root);
6749 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6752 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6754 assert(_wrap._root);
6755 _wrap = _wrap.next_sibling(_name);
6759 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6761 xml_named_node_iterator temp = *this;
6766 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6769 _wrap = _wrap.previous_sibling(_name);
6772 _wrap = _parent.last_child();
6774 if (!impl::strequal(_wrap.name(), _name))
6775 _wrap = _wrap.previous_sibling(_name);
6781 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6783 xml_named_node_iterator temp = *this;
6788 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6792 PUGI__FN xml_parse_result::operator bool() const
6794 return status == status_ok;
6797 PUGI__FN const char* xml_parse_result::description() const
6801 case status_ok: return "No error";
6803 case status_file_not_found: return "File was not found";
6804 case status_io_error: return "Error reading from file/stream";
6805 case status_out_of_memory: return "Could not allocate memory";
6806 case status_internal_error: return "Internal error occurred";
6808 case status_unrecognized_tag: return "Could not determine tag type";
6810 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6811 case status_bad_comment: return "Error parsing comment";
6812 case status_bad_cdata: return "Error parsing CDATA section";
6813 case status_bad_doctype: return "Error parsing document type declaration";
6814 case status_bad_pcdata: return "Error parsing PCDATA section";
6815 case status_bad_start_element: return "Error parsing start element tag";
6816 case status_bad_attribute: return "Error parsing element attribute";
6817 case status_bad_end_element: return "Error parsing end element tag";
6818 case status_end_element_mismatch: return "Start-end tags mismatch";
6820 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6822 case status_no_document_element: return "No document element found";
6824 default: return "Unknown error";
6828 PUGI__FN xml_document::xml_document(): _buffer(0)
6833 PUGI__FN xml_document::~xml_document()
6838 PUGI__FN void xml_document::reset()
6844 PUGI__FN void xml_document::reset(const xml_document& proto)
6848 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6852 PUGI__FN void xml_document::create()
6856 #ifdef PUGIXML_COMPACT
6857 const size_t page_offset = sizeof(uint32_t);
6859 const size_t page_offset = 0;
6862 // initialize sentinel page
6863 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6865 // prepare page structure
6866 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6869 page->busy_size = impl::xml_memory_page_size;
6871 // setup first page marker
6872 #ifdef PUGIXML_COMPACT
6873 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
6874 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6875 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6878 // allocate new root
6879 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6880 _root->prev_sibling_c = _root;
6882 // setup sentinel page
6883 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6885 // verify the document allocation
6886 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6889 PUGI__FN void xml_document::destroy()
6893 // destroy static storage
6896 impl::xml_memory::deallocate(_buffer);
6900 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6901 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6903 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6906 // destroy dynamic storage, leave sentinel page (it's in static memory)
6907 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6908 assert(root_page && !root_page->prev);
6909 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6911 for (impl::xml_memory_page* page = root_page->next; page; )
6913 impl::xml_memory_page* next = page->next;
6915 impl::xml_allocator::deallocate_page(page);
6920 #ifdef PUGIXML_COMPACT
6921 // destroy hash table
6922 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6928 #ifndef PUGIXML_NO_STL
6929 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
6933 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
6936 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
6940 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
6944 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
6946 // Force native encoding (skip autodetection)
6947 #ifdef PUGIXML_WCHAR_MODE
6948 xml_encoding encoding = encoding_wchar;
6950 xml_encoding encoding = encoding_utf8;
6953 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
6956 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
6958 return load_string(contents, options);
6961 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
6965 using impl::auto_deleter; // MSVC7 workaround
6966 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
6968 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6971 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
6975 using impl::auto_deleter; // MSVC7 workaround
6976 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
6978 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6981 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6985 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
6988 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6992 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
6995 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6999 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7002 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7004 impl::xml_buffered_writer buffered_writer(writer, encoding);
7006 if ((flags & format_write_bom) && encoding != encoding_latin1)
7008 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7009 #ifdef PUGIXML_WCHAR_MODE
7010 unsigned int bom = 0xfeff;
7011 buffered_writer.write(static_cast<wchar_t>(bom));
7013 buffered_writer.write('\xef', '\xbb', '\xbf');
7017 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7019 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7020 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7021 buffered_writer.write('?', '>');
7022 if (!(flags & format_raw)) buffered_writer.write('\n');
7025 impl::node_output(buffered_writer, _root, indent, flags, 0);
7027 buffered_writer.flush();
7030 #ifndef PUGIXML_NO_STL
7031 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7033 xml_writer_stream writer(stream);
7035 save(writer, indent, flags, encoding);
7038 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7040 xml_writer_stream writer(stream);
7042 save(writer, indent, flags, encoding_wchar);
7046 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7048 using impl::auto_deleter; // MSVC7 workaround
7049 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7051 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7054 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7056 using impl::auto_deleter; // MSVC7 workaround
7057 wchar_t openMode[3] = {L'\0'};
7058 if (flags & format_save_file_text) {
7064 auto_deleter<FILE> file(impl::open_file_wide(path_, openMode), impl::close_file);
7066 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7069 PUGI__FN xml_node xml_document::document_element() const
7073 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7074 if (PUGI__NODETYPE(i) == node_element)
7080 #ifndef PUGIXML_NO_STL
7081 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7085 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7088 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7090 return impl::as_utf8_impl(str.c_str(), str.size());
7093 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7097 return impl::as_wide_impl(str, strlen(str));
7100 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7102 return impl::as_wide_impl(str.c_str(), str.size());
7106 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7108 impl::xml_memory::allocate = allocate;
7109 impl::xml_memory::deallocate = deallocate;
7112 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7114 return impl::xml_memory::allocate;
7117 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7119 return impl::xml_memory::deallocate;
7123 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7126 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7127 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7129 return std::bidirectional_iterator_tag();
7132 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7134 return std::bidirectional_iterator_tag();
7137 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7139 return std::bidirectional_iterator_tag();
7144 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7147 // Workarounds for (non-standard) iterator category detection
7148 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7150 return std::bidirectional_iterator_tag();
7153 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7155 return std::bidirectional_iterator_tag();
7158 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7160 return std::bidirectional_iterator_tag();
7165 #ifndef PUGIXML_NO_XPATH
7170 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7178 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7186 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7194 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7200 template <typename T> void swap(T& lhs, T& rhs)
7207 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7211 for (I it = begin + 1; it != end; ++it)
7212 if (pred(*it, *result))
7218 template <typename I> void reverse(I begin, I end)
7220 while (end - begin > 1) swap(*begin++, *--end);
7223 template <typename I> I unique(I begin, I end)
7226 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7228 if (begin == end) return begin;
7230 // last written element
7233 // merge unique elements
7234 while (begin != end)
7236 if (*begin != *write)
7237 *++write = *begin++;
7242 // past-the-end (write points to live element)
7246 template <typename I> void copy_backwards(I begin, I end, I target)
7248 while (begin != end) *--target = *--end;
7251 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
7253 assert(begin != end);
7255 for (I it = begin + 1; it != end; ++it)
7259 if (pred(val, *begin))
7262 copy_backwards(begin, it, it + 1);
7269 // move hole backwards
7270 while (pred(val, *(hole - 1)))
7272 *hole = *(hole - 1);
7276 // fill hole with element
7282 // std variant for elements with ==
7283 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
7285 I eqbeg = middle, eqend = middle + 1;
7287 // expand equal range
7288 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7289 while (eqend != end && *eqend == *eqbeg) ++eqend;
7291 // process outer elements
7292 I ltend = eqbeg, gtbeg = eqend;
7296 // find the element from the right side that belongs to the left one
7297 for (; gtbeg != end; ++gtbeg)
7298 if (!pred(*eqbeg, *gtbeg))
7300 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
7304 // find the element from the left side that belongs to the right one
7305 for (; ltend != begin; --ltend)
7306 if (!pred(*(ltend - 1), *eqbeg))
7308 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
7312 // scanned all elements
7313 if (gtbeg == end && ltend == begin)
7320 // make room for elements by moving equal area
7323 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7324 swap(*eqbeg, *--eqend);
7326 else if (ltend == begin)
7328 if (eqend != gtbeg) swap(*eqbeg, *eqend);
7330 swap(*gtbeg++, *eqbeg++);
7332 else swap(*gtbeg++, *--ltend);
7336 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
7338 if (pred(*middle, *first)) swap(*middle, *first);
7339 if (pred(*last, *middle)) swap(*last, *middle);
7340 if (pred(*middle, *first)) swap(*middle, *first);
7343 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
7345 if (last - first <= 40)
7347 // median of three for small chunks
7348 median3(first, middle, last, pred);
7353 size_t step = (last - first + 1) / 8;
7355 median3(first, first + step, first + 2 * step, pred);
7356 median3(middle - step, middle, middle + step, pred);
7357 median3(last - 2 * step, last - step, last, pred);
7358 median3(first + step, middle, last - step, pred);
7362 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7364 // sort large chunks
7365 while (end - begin > 32)
7367 // find median element
7368 I middle = begin + (end - begin) / 2;
7369 median(begin, middle, end - 1, pred);
7371 // partition in three chunks (< = >)
7373 partition(begin, middle, end, pred, &eqbeg, &eqend);
7375 // loop on larger half
7376 if (eqbeg - begin > end - eqend)
7378 sort(eqend, end, pred);
7383 sort(begin, eqbeg, pred);
7388 // insertion sort small chunk
7389 if (begin != end) insertion_sort(begin, end, pred, &*begin);
7393 // Allocator used for AST and evaluation stacks
7395 static const size_t xpath_memory_page_size =
7396 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7397 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7403 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7405 struct xpath_memory_block
7407 xpath_memory_block* next;
7412 char data[xpath_memory_page_size];
7417 class xpath_allocator
7419 xpath_memory_block* _root;
7423 #ifdef PUGIXML_NO_EXCEPTIONS
7424 jmp_buf* error_handler;
7427 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
7429 #ifdef PUGIXML_NO_EXCEPTIONS
7434 void* allocate_nothrow(size_t size)
7436 // round size up to block alignment boundary
7437 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7439 if (_root_size + size <= _root->capacity)
7441 void* buf = &_root->data[0] + _root_size;
7447 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7448 size_t block_capacity_base = sizeof(_root->data);
7449 size_t block_capacity_req = size + block_capacity_base / 4;
7450 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7452 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7454 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7455 if (!block) return 0;
7457 block->next = _root;
7458 block->capacity = block_capacity;
7467 void* allocate(size_t size)
7469 void* result = allocate_nothrow(size);
7473 #ifdef PUGIXML_NO_EXCEPTIONS
7474 assert(error_handler);
7475 longjmp(*error_handler, 1);
7477 throw std::bad_alloc();
7484 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7486 // round size up to block alignment boundary
7487 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7488 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7490 // we can only reallocate the last object
7491 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7493 // adjust root size so that we have not allocated the object at all
7494 bool only_object = (_root_size == old_size);
7496 if (ptr) _root_size -= old_size;
7498 // allocate a new version (this will obviously reuse the memory if possible)
7499 void* result = allocate(new_size);
7502 // we have a new block
7503 if (result != ptr && ptr)
7506 assert(new_size >= old_size);
7507 memcpy(result, ptr, old_size);
7509 // free the previous page if it had no other objects
7512 assert(_root->data == result);
7513 assert(_root->next);
7515 xpath_memory_block* next = _root->next->next;
7519 // deallocate the whole page, unless it was the first one
7520 xml_memory::deallocate(_root->next);
7529 void revert(const xpath_allocator& state)
7531 // free all new pages
7532 xpath_memory_block* cur = _root;
7534 while (cur != state._root)
7536 xpath_memory_block* next = cur->next;
7538 xml_memory::deallocate(cur);
7544 _root = state._root;
7545 _root_size = state._root_size;
7550 xpath_memory_block* cur = _root;
7555 xpath_memory_block* next = cur->next;
7557 xml_memory::deallocate(cur);
7564 struct xpath_allocator_capture
7566 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7570 ~xpath_allocator_capture()
7572 _target->revert(_state);
7575 xpath_allocator* _target;
7576 xpath_allocator _state;
7581 xpath_allocator* result;
7582 xpath_allocator* temp;
7585 struct xpath_stack_data
7587 xpath_memory_block blocks[2];
7588 xpath_allocator result;
7589 xpath_allocator temp;
7592 #ifdef PUGIXML_NO_EXCEPTIONS
7593 jmp_buf error_handler;
7596 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
7598 blocks[0].next = blocks[1].next = 0;
7599 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7601 stack.result = &result;
7604 #ifdef PUGIXML_NO_EXCEPTIONS
7605 result.error_handler = temp.error_handler = &error_handler;
7621 const char_t* _buffer;
7623 size_t _length_heap;
7625 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7627 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7630 memcpy(result, string, length * sizeof(char_t));
7636 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7641 static xpath_string from_const(const char_t* str)
7643 return xpath_string(str, false, 0);
7646 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7648 assert(begin <= end && *end == 0);
7650 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7653 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7655 assert(begin <= end);
7657 size_t length = static_cast<size_t>(end - begin);
7659 return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
7662 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7666 void append(const xpath_string& o, xpath_allocator* alloc)
7668 // skip empty sources
7669 if (!*o._buffer) return;
7671 // fast append for constant empty target and constant source
7672 if (!*_buffer && !_uses_heap && !o._uses_heap)
7674 _buffer = o._buffer;
7678 // need to make heap copy
7679 size_t target_length = length();
7680 size_t source_length = o.length();
7681 size_t result_length = target_length + source_length;
7683 // allocate new buffer
7684 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7687 // append first string to the new buffer in case there was no reallocation
7688 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7690 // append second string to the new buffer
7691 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7692 result[result_length] = 0;
7697 _length_heap = result_length;
7701 const char_t* c_str() const
7706 size_t length() const
7708 return _uses_heap ? _length_heap : strlength(_buffer);
7711 char_t* data(xpath_allocator* alloc)
7713 // make private heap copy
7716 size_t length_ = strlength(_buffer);
7718 _buffer = duplicate_string(_buffer, length_, alloc);
7720 _length_heap = length_;
7723 return const_cast<char_t*>(_buffer);
7728 return *_buffer == 0;
7731 bool operator==(const xpath_string& o) const
7733 return strequal(_buffer, o._buffer);
7736 bool operator!=(const xpath_string& o) const
7738 return !strequal(_buffer, o._buffer);
7741 bool uses_heap() const
7749 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7751 while (*pattern && *string == *pattern)
7757 return *pattern == 0;
7760 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7762 #ifdef PUGIXML_WCHAR_MODE
7763 return wcschr(s, c);
7765 return strchr(s, c);
7769 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7771 #ifdef PUGIXML_WCHAR_MODE
7772 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7773 return (*p == 0) ? s : wcsstr(s, p);
7775 return strstr(s, p);
7779 // Converts symbol to lower case, if it is an ASCII one
7780 PUGI__FN char_t tolower_ascii(char_t ch)
7782 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7785 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7788 return xpath_string::from_const(na.attribute().value());
7791 xml_node n = na.node();
7799 return xpath_string::from_const(n.value());
7804 xpath_string result;
7806 // element nodes can have value if parse_embed_pcdata was used
7808 result.append(xpath_string::from_const(n.value()), alloc);
7810 xml_node cur = n.first_child();
7812 while (cur && cur != n)
7814 if (cur.type() == node_pcdata || cur.type() == node_cdata)
7815 result.append(xpath_string::from_const(cur.value()), alloc);
7817 if (cur.first_child())
7818 cur = cur.first_child();
7819 else if (cur.next_sibling())
7820 cur = cur.next_sibling();
7823 while (!cur.next_sibling() && cur != n)
7826 if (cur != n) cur = cur.next_sibling();
7834 return xpath_string();
7839 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7841 assert(ln->parent == rn->parent);
7843 // there is no common ancestor (the shared parent is null), nodes are from different documents
7844 if (!ln->parent) return ln < rn;
7846 // determine sibling order
7847 xml_node_struct* ls = ln;
7848 xml_node_struct* rs = rn;
7852 if (ls == rn) return true;
7853 if (rs == ln) return false;
7855 ls = ls->next_sibling;
7856 rs = rs->next_sibling;
7859 // if rn sibling chain ended ln must be before rn
7863 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7865 // find common ancestor at the same depth, if any
7866 xml_node_struct* lp = ln;
7867 xml_node_struct* rp = rn;
7869 while (lp && rp && lp->parent != rp->parent)
7875 // parents are the same!
7876 if (lp && rp) return node_is_before_sibling(lp, rp);
7878 // nodes are at different depths, need to normalize heights
7879 bool left_higher = !lp;
7893 // one node is the ancestor of the other
7894 if (ln == rn) return left_higher;
7896 // find common ancestor... again
7897 while (ln->parent != rn->parent)
7903 return node_is_before_sibling(ln, rn);
7906 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7908 while (node && node != parent) node = node->parent;
7910 return parent && node == parent;
7913 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7915 xml_node_struct* node = xnode.node().internal_object();
7919 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7921 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7922 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7928 xml_attribute_struct* attr = xnode.attribute().internal_object();
7932 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7934 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7935 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7944 struct document_order_comparator
7946 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7948 // optimized document order based check
7949 const void* lo = document_buffer_order(lhs);
7950 const void* ro = document_buffer_order(rhs);
7952 if (lo && ro) return lo < ro;
7955 xml_node ln = lhs.node(), rn = rhs.node();
7957 // compare attributes
7958 if (lhs.attribute() && rhs.attribute())
7961 if (lhs.parent() == rhs.parent())
7963 // determine sibling order
7964 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7965 if (a == rhs.attribute())
7971 // compare attribute parents
7975 else if (lhs.attribute())
7977 // attributes go after the parent element
7978 if (lhs.parent() == rhs.node()) return false;
7982 else if (rhs.attribute())
7984 // attributes go after the parent element
7985 if (rhs.parent() == lhs.node()) return true;
7990 if (ln == rn) return false;
7992 if (!ln || !rn) return ln < rn;
7994 return node_is_before(ln.internal_object(), rn.internal_object());
7998 struct duplicate_comparator
8000 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8002 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
8003 else return rhs.attribute() ? false : lhs.node() < rhs.node();
8007 PUGI__FN double gen_nan()
8009 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8010 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8011 typedef uint32_t UI; // BCC5 workaround
8012 union { float f; UI i; } u;
8017 const volatile double zero = 0.0;
8022 PUGI__FN bool is_nan(double value)
8024 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8025 return !!_isnan(value);
8026 #elif defined(fpclassify) && defined(FP_NAN)
8027 return fpclassify(value) == FP_NAN;
8030 const volatile double v = value;
8035 PUGI__FN const char_t* convert_number_to_string_special(double value)
8037 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8038 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8039 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8040 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8041 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8042 switch (fpclassify(value))
8045 return PUGIXML_TEXT("NaN");
8048 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8051 return PUGIXML_TEXT("0");
8058 const volatile double v = value;
8060 if (v == 0) return PUGIXML_TEXT("0");
8061 if (v != v) return PUGIXML_TEXT("NaN");
8062 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8067 PUGI__FN bool convert_number_to_boolean(double value)
8069 return (value != 0 && !is_nan(value));
8072 PUGI__FN void truncate_zeros(char* begin, char* end)
8074 while (begin != end && end[-1] == '0') end--;
8079 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8080 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8081 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8085 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
8087 // truncate redundant zeros
8088 truncate_zeros(buffer, buffer + strlen(buffer));
8091 *out_mantissa = buffer;
8092 *out_exponent = exponent;
8095 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8097 // get a scientific notation value with IEEE DBL_DIG decimals
8098 sprintf(buffer, "%.*e", DBL_DIG, value);
8099 assert(strlen(buffer) < buffer_size);
8102 // get the exponent (possibly negative)
8103 char* exponent_string = strchr(buffer, 'e');
8104 assert(exponent_string);
8106 int exponent = atoi(exponent_string + 1);
8108 // extract mantissa string: skip sign
8109 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8110 assert(mantissa[0] != '0' && mantissa[1] == '.');
8112 // divide mantissa by 10 to eliminate integer part
8113 mantissa[1] = mantissa[0];
8117 // remove extra mantissa digits and zero-terminate mantissa
8118 truncate_zeros(mantissa, exponent_string);
8121 *out_mantissa = mantissa;
8122 *out_exponent = exponent;
8126 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8128 // try special number conversion
8129 const char_t* special = convert_number_to_string_special(value);
8130 if (special) return xpath_string::from_const(special);
8132 // get mantissa + exponent form
8133 char mantissa_buffer[32] = {};
8135 char* mantissa = nullptr;
8137 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
8139 // allocate a buffer of suitable length for the number
8140 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8141 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8148 if (value < 0) *s++ = '-';
8157 while (exponent > 0)
8159 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
8160 *s++ = *mantissa ? *mantissa++ : '0';
8171 // extra zeroes from negative exponent
8172 while (exponent < 0)
8178 // extra mantissa digits
8181 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8187 assert(s < result + result_size);
8190 return xpath_string::from_heap_preallocated(result, s);
8193 PUGI__FN bool check_string_to_number_format(const char_t* string)
8195 // parse leading whitespace
8196 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8199 if (*string == '-') ++string;
8201 if (!*string) return false;
8203 // if there is no integer part, there should be a decimal part with at least one digit
8204 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8206 // parse integer part
8207 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8209 // parse decimal part
8214 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8217 // parse trailing whitespace
8218 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8220 return *string == 0;
8223 PUGI__FN double convert_string_to_number(const char_t* string)
8225 // check string format
8226 if (!check_string_to_number_format(string)) return gen_nan();
8229 #ifdef PUGIXML_WCHAR_MODE
8230 return wcstod(string, 0);
8232 return strtod(string, 0);
8236 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8238 size_t length = static_cast<size_t>(end - begin);
8239 char_t* scratch = buffer;
8241 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8243 // need to make dummy on-heap copy
8244 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8245 if (!scratch) return false;
8248 // copy string to zero-terminated buffer and perform conversion
8249 memcpy(scratch, begin, length * sizeof(char_t));
8250 scratch[length] = 0;
8252 *out_result = convert_string_to_number(scratch);
8254 // free dummy buffer
8255 if (scratch != buffer) xml_memory::deallocate(scratch);
8260 PUGI__FN double round_nearest(double value)
8262 return floor(value + 0.5);
8265 PUGI__FN double round_nearest_nzero(double value)
8267 // same as round_nearest, but returns -0 for [-0.5, -0]
8268 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8269 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8272 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8274 return node.attribute() ? node.attribute().name() : node.node().name();
8277 PUGI__FN const char_t* local_name(const xpath_node& node)
8279 const char_t* name = qualified_name(node);
8280 const char_t* p = find_char(name, ':');
8282 return p ? p + 1 : name;
8285 struct namespace_uri_predicate
8287 const char_t* prefix;
8288 size_t prefix_length;
8290 namespace_uri_predicate(const char_t* name)
8292 const char_t* pos = find_char(name, ':');
8294 prefix = pos ? name : 0;
8295 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8298 bool operator()(xml_attribute a) const
8300 const char_t* name = a.name();
8302 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8304 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8308 PUGI__FN const char_t* namespace_uri(xml_node node)
8310 namespace_uri_predicate pred = node.name();
8316 xml_attribute a = p.find_attribute(pred);
8318 if (a) return a.value();
8323 return PUGIXML_TEXT("");
8326 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8328 namespace_uri_predicate pred = attr.name();
8330 // Default namespace does not apply to attributes
8331 if (!pred.prefix) return PUGIXML_TEXT("");
8333 xml_node p = parent;
8337 xml_attribute a = p.find_attribute(pred);
8339 if (a) return a.value();
8344 return PUGIXML_TEXT("");
8347 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8349 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8352 PUGI__FN char_t* normalize_space(char_t* buffer)
8354 char_t* write = buffer;
8356 for (char_t* it = buffer; *it; )
8360 if (PUGI__IS_CHARTYPE(ch, ct_space))
8362 // replace whitespace sequence with single space
8363 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8365 // avoid leading spaces
8366 if (write != buffer) *write++ = ' ';
8371 // remove trailing space
8372 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8380 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8382 char_t* write = buffer;
8386 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8388 const char_t* pos = find_char(from, ch);
8391 *write++ = ch; // do not process
8392 else if (static_cast<size_t>(pos - from) < to_length)
8393 *write++ = to[pos - from]; // replace
8402 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8404 unsigned char table[128] = {0};
8408 unsigned int fc = static_cast<unsigned int>(*from);
8409 unsigned int tc = static_cast<unsigned int>(*to);
8411 if (fc >= 128 || tc >= 128)
8414 // code=128 means "skip character"
8416 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8422 for (int i = 0; i < 128; ++i)
8424 table[i] = static_cast<unsigned char>(i);
8426 void* result = alloc->allocate_nothrow(sizeof(table));
8430 memcpy(result, table, sizeof(table));
8433 return static_cast<unsigned char*>(result);
8436 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8438 char_t* write = buffer;
8442 char_t ch = *buffer++;
8443 unsigned int index = static_cast<unsigned int>(ch);
8447 unsigned char code = table[index];
8449 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8450 // this code skips these characters without extra branches
8451 *write = static_cast<char_t>(code);
8452 write += 1 - (code >> 7);
8466 inline bool is_xpath_attribute(const char_t* name)
8468 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8471 struct xpath_variable_boolean: xpath_variable
8473 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8481 struct xpath_variable_number: xpath_variable
8483 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8491 struct xpath_variable_string: xpath_variable
8493 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8497 ~xpath_variable_string()
8499 if (value) xml_memory::deallocate(value);
8506 struct xpath_variable_node_set: xpath_variable
8508 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8512 xpath_node_set value;
8516 static const xpath_node_set dummy_node_set;
8518 PUGI__FN unsigned int hash_string(const char_t* str)
8520 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8521 unsigned int result = 0;
8525 result += static_cast<unsigned int>(*str++);
8526 result += result << 10;
8527 result ^= result >> 6;
8530 result += result << 3;
8531 result ^= result >> 11;
8532 result += result << 15;
8537 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8539 size_t length = strlength(name);
8540 if (length == 0) return 0; // empty variable names are invalid
8542 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8543 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8544 if (!memory) return 0;
8546 T* result = new (memory) T();
8548 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8553 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8557 case xpath_type_node_set:
8558 return new_xpath_variable<xpath_variable_node_set>(name);
8560 case xpath_type_number:
8561 return new_xpath_variable<xpath_variable_number>(name);
8563 case xpath_type_string:
8564 return new_xpath_variable<xpath_variable_string>(name);
8566 case xpath_type_boolean:
8567 return new_xpath_variable<xpath_variable_boolean>(name);
8574 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8577 xml_memory::deallocate(var);
8580 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8584 case xpath_type_node_set:
8585 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8588 case xpath_type_number:
8589 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8592 case xpath_type_string:
8593 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8596 case xpath_type_boolean:
8597 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8601 assert(false && "Invalid variable type");
8605 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8607 switch (rhs->type())
8609 case xpath_type_node_set:
8610 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8612 case xpath_type_number:
8613 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8615 case xpath_type_string:
8616 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8618 case xpath_type_boolean:
8619 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8622 assert(false && "Invalid variable type");
8627 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8629 size_t length = static_cast<size_t>(end - begin);
8630 char_t* scratch = buffer;
8632 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8634 // need to make dummy on-heap copy
8635 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8636 if (!scratch) return false;
8639 // copy string to zero-terminated buffer and perform lookup
8640 memcpy(scratch, begin, length * sizeof(char_t));
8641 scratch[length] = 0;
8643 *out_result = set->get(scratch);
8645 // free dummy buffer
8646 if (scratch != buffer) xml_memory::deallocate(scratch);
8652 // Internal node set class
8654 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8656 if (end - begin < 2)
8657 return xpath_node_set::type_sorted;
8659 document_order_comparator cmp;
8661 bool first = cmp(begin[0], begin[1]);
8663 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8664 if (cmp(it[0], it[1]) != first)
8665 return xpath_node_set::type_unsorted;
8667 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8670 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8672 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8674 if (type == xpath_node_set::type_unsorted)
8676 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8678 if (sorted == xpath_node_set::type_unsorted)
8680 sort(begin, end, document_order_comparator());
8682 type = xpath_node_set::type_sorted;
8688 if (type != order) reverse(begin, end);
8693 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8695 if (begin == end) return xpath_node();
8699 case xpath_node_set::type_sorted:
8702 case xpath_node_set::type_sorted_reverse:
8705 case xpath_node_set::type_unsorted:
8706 return *min_element(begin, end, document_order_comparator());
8709 assert(false && "Invalid node set type");
8710 return xpath_node();
8714 class xpath_node_set_raw
8716 xpath_node_set::type_t _type;
8723 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8727 xpath_node* begin() const
8732 xpath_node* end() const
8739 return _begin == _end;
8744 return static_cast<size_t>(_end - _begin);
8747 xpath_node first() const
8749 return xpath_first(_begin, _end, _type);
8752 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8754 void push_back(const xpath_node& node, xpath_allocator* alloc)
8759 push_back_grow(node, alloc);
8762 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8764 if (begin_ == end_) return;
8766 size_t size_ = static_cast<size_t>(_end - _begin);
8767 size_t capacity = static_cast<size_t>(_eos - _begin);
8768 size_t count = static_cast<size_t>(end_ - begin_);
8770 if (size_ + count > capacity)
8772 // reallocate the old array or allocate a new one
8773 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8778 _end = data + size_;
8779 _eos = data + size_ + count;
8782 memcpy(_end, begin_, count * sizeof(xpath_node));
8788 _type = xpath_sort(_begin, _end, _type, false);
8791 void truncate(xpath_node* pos)
8793 assert(_begin <= pos && pos <= _end);
8798 void remove_duplicates()
8800 if (_type == xpath_node_set::type_unsorted)
8801 sort(_begin, _end, duplicate_comparator());
8803 _end = unique(_begin, _end);
8806 xpath_node_set::type_t type() const
8811 void set_type(xpath_node_set::type_t value)
8817 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8819 size_t capacity = static_cast<size_t>(_eos - _begin);
8821 // get new capacity (1.5x rule)
8822 size_t new_capacity = capacity + capacity / 2 + 1;
8824 // reallocate the old array or allocate a new one
8825 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8830 _end = data + capacity;
8831 _eos = data + new_capacity;
8839 struct xpath_context
8842 size_t position, size;
8844 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8857 lex_greater_or_equal,
8869 lex_open_square_brace,
8870 lex_close_square_brace,
8880 struct xpath_lexer_string
8882 const char_t* begin;
8885 xpath_lexer_string(): begin(0), end(0)
8889 bool operator==(const char_t* other) const
8891 size_t length = static_cast<size_t>(end - begin);
8893 return strequalrange(other, begin, length);
8900 const char_t* _cur_lexeme_pos;
8901 xpath_lexer_string _cur_lexeme_contents;
8903 lexeme_t _cur_lexeme;
8906 explicit xpath_lexer(const char_t* query): _cur(query)
8911 const char_t* state() const
8918 const char_t* cur = _cur;
8920 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8922 // save lexeme position for error reporting
8923 _cur_lexeme_pos = cur;
8928 _cur_lexeme = lex_eof;
8932 if (*(cur+1) == '=')
8935 _cur_lexeme = lex_greater_or_equal;
8940 _cur_lexeme = lex_greater;
8945 if (*(cur+1) == '=')
8948 _cur_lexeme = lex_less_or_equal;
8953 _cur_lexeme = lex_less;
8958 if (*(cur+1) == '=')
8961 _cur_lexeme = lex_not_equal;
8965 _cur_lexeme = lex_none;
8971 _cur_lexeme = lex_equal;
8977 _cur_lexeme = lex_plus;
8983 _cur_lexeme = lex_minus;
8989 _cur_lexeme = lex_multiply;
8995 _cur_lexeme = lex_union;
9002 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9004 _cur_lexeme_contents.begin = cur;
9006 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9008 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9012 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9015 _cur_lexeme_contents.end = cur;
9017 _cur_lexeme = lex_var_ref;
9021 _cur_lexeme = lex_none;
9028 _cur_lexeme = lex_open_brace;
9034 _cur_lexeme = lex_close_brace;
9040 _cur_lexeme = lex_open_square_brace;
9046 _cur_lexeme = lex_close_square_brace;
9052 _cur_lexeme = lex_comma;
9057 if (*(cur+1) == '/')
9060 _cur_lexeme = lex_double_slash;
9065 _cur_lexeme = lex_slash;
9070 if (*(cur+1) == '.')
9073 _cur_lexeme = lex_double_dot;
9075 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9077 _cur_lexeme_contents.begin = cur; // .
9081 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9083 _cur_lexeme_contents.end = cur;
9085 _cur_lexeme = lex_number;
9090 _cur_lexeme = lex_dot;
9096 _cur_lexeme = lex_axis_attribute;
9103 char_t terminator = *cur;
9107 _cur_lexeme_contents.begin = cur;
9108 while (*cur && *cur != terminator) cur++;
9109 _cur_lexeme_contents.end = cur;
9112 _cur_lexeme = lex_none;
9116 _cur_lexeme = lex_quoted_string;
9123 if (*(cur+1) == ':')
9126 _cur_lexeme = lex_double_colon;
9130 _cur_lexeme = lex_none;
9135 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9137 _cur_lexeme_contents.begin = cur;
9139 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9145 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9148 _cur_lexeme_contents.end = cur;
9150 _cur_lexeme = lex_number;
9152 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9154 _cur_lexeme_contents.begin = cur;
9156 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9160 if (cur[1] == '*') // namespace test ncname:*
9164 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9168 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9172 _cur_lexeme_contents.end = cur;
9174 _cur_lexeme = lex_string;
9178 _cur_lexeme = lex_none;
9185 lexeme_t current() const
9190 const char_t* current_pos() const
9192 return _cur_lexeme_pos;
9195 const xpath_lexer_string& contents() const
9197 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9199 return _cur_lexeme_contents;
9206 ast_op_or, // left or right
9207 ast_op_and, // left and right
9208 ast_op_equal, // left = right
9209 ast_op_not_equal, // left != right
9210 ast_op_less, // left < right
9211 ast_op_greater, // left > right
9212 ast_op_less_or_equal, // left <= right
9213 ast_op_greater_or_equal, // left >= right
9214 ast_op_add, // left + right
9215 ast_op_subtract, // left - right
9216 ast_op_multiply, // left * right
9217 ast_op_divide, // left / right
9218 ast_op_mod, // left % right
9219 ast_op_negate, // left - right
9220 ast_op_union, // left | right
9221 ast_predicate, // apply predicate to set; next points to next predicate
9222 ast_filter, // select * from left where right
9223 ast_string_constant, // string constant
9224 ast_number_constant, // number constant
9225 ast_variable, // variable
9226 ast_func_last, // last()
9227 ast_func_position, // position()
9228 ast_func_count, // count(left)
9229 ast_func_id, // id(left)
9230 ast_func_local_name_0, // local-name()
9231 ast_func_local_name_1, // local-name(left)
9232 ast_func_namespace_uri_0, // namespace-uri()
9233 ast_func_namespace_uri_1, // namespace-uri(left)
9234 ast_func_name_0, // name()
9235 ast_func_name_1, // name(left)
9236 ast_func_string_0, // string()
9237 ast_func_string_1, // string(left)
9238 ast_func_concat, // concat(left, right, siblings)
9239 ast_func_starts_with, // starts_with(left, right)
9240 ast_func_contains, // contains(left, right)
9241 ast_func_substring_before, // substring-before(left, right)
9242 ast_func_substring_after, // substring-after(left, right)
9243 ast_func_substring_2, // substring(left, right)
9244 ast_func_substring_3, // substring(left, right, third)
9245 ast_func_string_length_0, // string-length()
9246 ast_func_string_length_1, // string-length(left)
9247 ast_func_normalize_space_0, // normalize-space()
9248 ast_func_normalize_space_1, // normalize-space(left)
9249 ast_func_translate, // translate(left, right, third)
9250 ast_func_boolean, // boolean(left)
9251 ast_func_not, // not(left)
9252 ast_func_true, // true()
9253 ast_func_false, // false()
9254 ast_func_lang, // lang(left)
9255 ast_func_number_0, // number()
9256 ast_func_number_1, // number(left)
9257 ast_func_sum, // sum(left)
9258 ast_func_floor, // floor(left)
9259 ast_func_ceiling, // ceiling(left)
9260 ast_func_round, // round(left)
9261 ast_step, // process set left with step
9262 ast_step_root, // select root node
9264 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9265 ast_opt_compare_attribute // @name = 'string'
9271 axis_ancestor_or_self,
9275 axis_descendant_or_self,
9277 axis_following_sibling,
9281 axis_preceding_sibling,
9290 nodetest_type_comment,
9295 nodetest_all_in_namespace
9303 predicate_constant_one
9313 template <axis_t N> struct axis_to_type
9315 static const axis_t axis;
9318 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9320 class xpath_ast_node
9330 // for ast_step/ast_predicate/ast_filter
9333 // tree node structure
9334 xpath_ast_node* _left;
9335 xpath_ast_node* _right;
9336 xpath_ast_node* _next;
9340 // value for ast_string_constant
9341 const char_t* string;
9342 // value for ast_number_constant
9344 // variable for ast_variable
9345 xpath_variable* variable;
9346 // node test for ast_step (node name/namespace/node type/pi target)
9347 const char_t* nodetest;
9348 // table for ast_opt_translate_table
9349 const unsigned char* table;
9352 xpath_ast_node(const xpath_ast_node&);
9353 xpath_ast_node& operator=(const xpath_ast_node&);
9355 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9357 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9359 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9361 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9362 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9363 else if (lt == xpath_type_number || rt == xpath_type_number)
9364 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9365 else if (lt == xpath_type_string || rt == xpath_type_string)
9367 xpath_allocator_capture cr(stack.result);
9369 xpath_string ls = lhs->eval_string(c, stack);
9370 xpath_string rs = rhs->eval_string(c, stack);
9372 return comp(ls, rs);
9375 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9377 xpath_allocator_capture cr(stack.result);
9379 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9380 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9382 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9383 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9385 xpath_allocator_capture cri(stack.result);
9387 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9395 if (lt == xpath_type_node_set)
9401 if (lt == xpath_type_boolean)
9402 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9403 else if (lt == xpath_type_number)
9405 xpath_allocator_capture cr(stack.result);
9407 double l = lhs->eval_number(c, stack);
9408 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9410 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9412 xpath_allocator_capture cri(stack.result);
9414 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9420 else if (lt == xpath_type_string)
9422 xpath_allocator_capture cr(stack.result);
9424 xpath_string l = lhs->eval_string(c, stack);
9425 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9427 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9429 xpath_allocator_capture cri(stack.result);
9431 if (comp(l, string_value(*ri, stack.result)))
9439 assert(false && "Wrong types");
9443 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9445 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9448 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9450 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9452 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9453 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9454 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9456 xpath_allocator_capture cr(stack.result);
9458 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9459 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9461 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9463 xpath_allocator_capture cri(stack.result);
9465 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9467 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9469 xpath_allocator_capture crii(stack.result);
9471 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9478 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9480 xpath_allocator_capture cr(stack.result);
9482 double l = lhs->eval_number(c, stack);
9483 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9485 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9487 xpath_allocator_capture cri(stack.result);
9489 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9495 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9497 xpath_allocator_capture cr(stack.result);
9499 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9500 double r = rhs->eval_number(c, stack);
9502 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9504 xpath_allocator_capture cri(stack.result);
9506 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9514 assert(false && "Wrong types");
9519 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9521 assert(ns.size() >= first);
9522 assert(expr->rettype() != xpath_type_number);
9525 size_t size = ns.size() - first;
9527 xpath_node* last = ns.begin() + first;
9529 // remove_if... or well, sort of
9530 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9532 xpath_context c(*it, i, size);
9534 if (expr->eval_boolean(c, stack))
9545 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9547 assert(ns.size() >= first);
9548 assert(expr->rettype() == xpath_type_number);
9551 size_t size = ns.size() - first;
9553 xpath_node* last = ns.begin() + first;
9555 // remove_if... or well, sort of
9556 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9558 xpath_context c(*it, i, size);
9560 if (expr->eval_number(c, stack) == i)
9571 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9573 assert(ns.size() >= first);
9574 assert(expr->rettype() == xpath_type_number);
9576 size_t size = ns.size() - first;
9578 xpath_node* last = ns.begin() + first;
9580 xpath_context c(xpath_node(), 1, size);
9582 double er = expr->eval_number(c, stack);
9584 if (er >= 1.0 && er <= size)
9586 size_t eri = static_cast<size_t>(er);
9590 xpath_node r = last[eri - 1];
9599 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9601 if (ns.size() == first) return;
9603 assert(_type == ast_filter || _type == ast_predicate);
9605 if (_test == predicate_constant || _test == predicate_constant_one)
9606 apply_predicate_number_const(ns, first, _right, stack);
9607 else if (_right->rettype() == xpath_type_number)
9608 apply_predicate_number(ns, first, _right, stack, once);
9610 apply_predicate_boolean(ns, first, _right, stack, once);
9613 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9615 if (ns.size() == first) return;
9617 bool last_once = eval_once(ns.type(), eval);
9619 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9620 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9623 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9627 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9632 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9634 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9639 case nodetest_type_node:
9641 if (is_xpath_attribute(name))
9643 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9648 case nodetest_all_in_namespace:
9649 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9651 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9663 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9667 xml_node_type type = PUGI__NODETYPE(n);
9672 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9674 ns.push_back(xml_node(n), alloc);
9679 case nodetest_type_node:
9680 ns.push_back(xml_node(n), alloc);
9683 case nodetest_type_comment:
9684 if (type == node_comment)
9686 ns.push_back(xml_node(n), alloc);
9691 case nodetest_type_text:
9692 if (type == node_pcdata || type == node_cdata)
9694 ns.push_back(xml_node(n), alloc);
9699 case nodetest_type_pi:
9700 if (type == node_pi)
9702 ns.push_back(xml_node(n), alloc);
9708 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9710 ns.push_back(xml_node(n), alloc);
9716 if (type == node_element)
9718 ns.push_back(xml_node(n), alloc);
9723 case nodetest_all_in_namespace:
9724 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9726 ns.push_back(xml_node(n), alloc);
9732 assert(false && "Unknown axis");
9738 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9740 const axis_t axis = T::axis;
9744 case axis_attribute:
9746 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9747 if (step_push(ns, a, n, alloc) & once)
9755 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9756 if (step_push(ns, c, alloc) & once)
9762 case axis_descendant:
9763 case axis_descendant_or_self:
9765 if (axis == axis_descendant_or_self)
9766 if (step_push(ns, n, alloc) & once)
9769 xml_node_struct* cur = n->first_child;
9773 if (step_push(ns, cur, alloc) & once)
9776 if (cur->first_child)
9777 cur = cur->first_child;
9780 while (!cur->next_sibling)
9784 if (cur == n) return;
9787 cur = cur->next_sibling;
9794 case axis_following_sibling:
9796 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9797 if (step_push(ns, c, alloc) & once)
9803 case axis_preceding_sibling:
9805 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9806 if (step_push(ns, c, alloc) & once)
9812 case axis_following:
9814 xml_node_struct* cur = n;
9816 // exit from this node so that we don't include descendants
9817 while (!cur->next_sibling)
9824 cur = cur->next_sibling;
9828 if (step_push(ns, cur, alloc) & once)
9831 if (cur->first_child)
9832 cur = cur->first_child;
9835 while (!cur->next_sibling)
9842 cur = cur->next_sibling;
9849 case axis_preceding:
9851 xml_node_struct* cur = n;
9853 // exit from this node so that we don't include descendants
9854 while (!cur->prev_sibling_c->next_sibling)
9861 cur = cur->prev_sibling_c;
9865 if (cur->first_child)
9866 cur = cur->first_child->prev_sibling_c;
9869 // leaf node, can't be ancestor
9870 if (step_push(ns, cur, alloc) & once)
9873 while (!cur->prev_sibling_c->next_sibling)
9879 if (!node_is_ancestor(cur, n))
9880 if (step_push(ns, cur, alloc) & once)
9884 cur = cur->prev_sibling_c;
9892 case axis_ancestor_or_self:
9894 if (axis == axis_ancestor_or_self)
9895 if (step_push(ns, n, alloc) & once)
9898 xml_node_struct* cur = n->parent;
9902 if (step_push(ns, cur, alloc) & once)
9913 step_push(ns, n, alloc);
9921 step_push(ns, n->parent, alloc);
9927 assert(false && "Unimplemented axis");
9931 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9933 const axis_t axis = T::axis;
9938 case axis_ancestor_or_self:
9940 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9941 if (step_push(ns, a, p, alloc) & once)
9944 xml_node_struct* cur = p;
9948 if (step_push(ns, cur, alloc) & once)
9957 case axis_descendant_or_self:
9960 if (_test == nodetest_type_node) // reject attributes based on principal node type test
9961 step_push(ns, a, p, alloc);
9966 case axis_following:
9968 xml_node_struct* cur = p;
9972 if (cur->first_child)
9973 cur = cur->first_child;
9976 while (!cur->next_sibling)
9983 cur = cur->next_sibling;
9986 if (step_push(ns, cur, alloc) & once)
9995 step_push(ns, p, alloc);
10000 case axis_preceding:
10002 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10003 step_fill(ns, p, alloc, once, v);
10008 assert(false && "Unimplemented axis");
10012 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10014 const axis_t axis = T::axis;
10015 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10018 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10019 else if (axis_has_attributes && xn.attribute() && xn.parent())
10020 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10023 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10025 const axis_t axis = T::axis;
10026 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10027 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10030 (axis == axis_attribute && _test == nodetest_name) ||
10031 (!_right && eval_once(axis_type, eval)) ||
10032 (_right && !_right->_next && _right->_test == predicate_constant_one);
10034 xpath_node_set_raw ns;
10035 ns.set_type(axis_type);
10039 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10041 // self axis preserves the original order
10042 if (axis == axis_self) ns.set_type(s.type());
10044 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10046 size_t size = ns.size();
10048 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10049 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10051 step_fill(ns, *it, stack.result, once, v);
10052 if (_right) apply_predicates(ns, size, stack, eval);
10057 step_fill(ns, c.n, stack.result, once, v);
10058 if (_right) apply_predicates(ns, 0, stack, eval);
10061 // child, attribute and self axes always generate unique set of nodes
10062 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10063 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10064 ns.remove_duplicates();
10070 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10071 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10073 assert(type == ast_string_constant);
10074 _data.string = value;
10077 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10078 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10080 assert(type == ast_number_constant);
10081 _data.number = value;
10084 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10085 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10087 assert(type == ast_variable);
10088 _data.variable = value;
10091 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10092 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10096 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10097 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10099 assert(type == ast_step);
10100 _data.nodetest = contents;
10103 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10104 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10106 assert(type == ast_filter || type == ast_predicate);
10109 void set_next(xpath_ast_node* value)
10114 void set_right(xpath_ast_node* value)
10119 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10124 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10127 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10130 return compare_eq(_left, _right, c, stack, equal_to());
10132 case ast_op_not_equal:
10133 return compare_eq(_left, _right, c, stack, not_equal_to());
10136 return compare_rel(_left, _right, c, stack, less());
10138 case ast_op_greater:
10139 return compare_rel(_right, _left, c, stack, less());
10141 case ast_op_less_or_equal:
10142 return compare_rel(_left, _right, c, stack, less_equal());
10144 case ast_op_greater_or_equal:
10145 return compare_rel(_right, _left, c, stack, less_equal());
10147 case ast_func_starts_with:
10149 xpath_allocator_capture cr(stack.result);
10151 xpath_string lr = _left->eval_string(c, stack);
10152 xpath_string rr = _right->eval_string(c, stack);
10154 return starts_with(lr.c_str(), rr.c_str());
10157 case ast_func_contains:
10159 xpath_allocator_capture cr(stack.result);
10161 xpath_string lr = _left->eval_string(c, stack);
10162 xpath_string rr = _right->eval_string(c, stack);
10164 return find_substring(lr.c_str(), rr.c_str()) != 0;
10167 case ast_func_boolean:
10168 return _left->eval_boolean(c, stack);
10171 return !_left->eval_boolean(c, stack);
10173 case ast_func_true:
10176 case ast_func_false:
10179 case ast_func_lang:
10181 if (c.n.attribute()) return false;
10183 xpath_allocator_capture cr(stack.result);
10185 xpath_string lang = _left->eval_string(c, stack);
10187 for (xml_node n = c.n.node(); n; n = n.parent())
10189 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10193 const char_t* value = a.value();
10195 // strnicmp / strncasecmp is not portable
10196 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10198 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10202 return *value == 0 || *value == '-';
10209 case ast_opt_compare_attribute:
10211 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10213 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10215 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10220 assert(_rettype == _data.variable->type());
10222 if (_rettype == xpath_type_boolean)
10223 return _data.variable->get_boolean();
10225 // fallthrough to type conversion
10232 case xpath_type_number:
10233 return convert_number_to_boolean(eval_number(c, stack));
10235 case xpath_type_string:
10237 xpath_allocator_capture cr(stack.result);
10239 return !eval_string(c, stack).empty();
10242 case xpath_type_node_set:
10244 xpath_allocator_capture cr(stack.result);
10246 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10250 assert(false && "Wrong expression for return type boolean");
10257 double eval_number(const xpath_context& c, const xpath_stack& stack)
10262 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10264 case ast_op_subtract:
10265 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10267 case ast_op_multiply:
10268 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10270 case ast_op_divide:
10271 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10274 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10276 case ast_op_negate:
10277 return -_left->eval_number(c, stack);
10279 case ast_number_constant:
10280 return _data.number;
10282 case ast_func_last:
10283 return static_cast<double>(c.size);
10285 case ast_func_position:
10286 return static_cast<double>(c.position);
10288 case ast_func_count:
10290 xpath_allocator_capture cr(stack.result);
10292 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10295 case ast_func_string_length_0:
10297 xpath_allocator_capture cr(stack.result);
10299 return static_cast<double>(string_value(c.n, stack.result).length());
10302 case ast_func_string_length_1:
10304 xpath_allocator_capture cr(stack.result);
10306 return static_cast<double>(_left->eval_string(c, stack).length());
10309 case ast_func_number_0:
10311 xpath_allocator_capture cr(stack.result);
10313 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10316 case ast_func_number_1:
10317 return _left->eval_number(c, stack);
10321 xpath_allocator_capture cr(stack.result);
10325 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10327 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10329 xpath_allocator_capture cri(stack.result);
10331 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10337 case ast_func_floor:
10339 double r = _left->eval_number(c, stack);
10341 return r == r ? floor(r) : r;
10344 case ast_func_ceiling:
10346 double r = _left->eval_number(c, stack);
10348 return r == r ? ceil(r) : r;
10351 case ast_func_round:
10352 return round_nearest_nzero(_left->eval_number(c, stack));
10356 assert(_rettype == _data.variable->type());
10358 if (_rettype == xpath_type_number)
10359 return _data.variable->get_number();
10361 // fallthrough to type conversion
10368 case xpath_type_boolean:
10369 return eval_boolean(c, stack) ? 1 : 0;
10371 case xpath_type_string:
10373 xpath_allocator_capture cr(stack.result);
10375 return convert_string_to_number(eval_string(c, stack).c_str());
10378 case xpath_type_node_set:
10380 xpath_allocator_capture cr(stack.result);
10382 return convert_string_to_number(eval_string(c, stack).c_str());
10386 assert(false && "Wrong expression for return type number");
10394 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10396 assert(_type == ast_func_concat);
10398 xpath_allocator_capture ct(stack.temp);
10400 // count the string number
10402 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10404 // gather all strings
10405 xpath_string static_buffer[4];
10406 xpath_string* buffer = static_buffer;
10408 // allocate on-heap for large concats
10409 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
10411 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10415 // evaluate all strings to temporary stack
10416 xpath_stack swapped_stack = {stack.temp, stack.result};
10418 buffer[0] = _left->eval_string(c, swapped_stack);
10421 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10422 assert(pos == count);
10424 // get total length
10426 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10428 // create final string
10429 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10432 char_t* ri = result;
10434 for (size_t j = 0; j < count; ++j)
10435 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10440 return xpath_string::from_heap_preallocated(result, ri);
10443 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10447 case ast_string_constant:
10448 return xpath_string::from_const(_data.string);
10450 case ast_func_local_name_0:
10452 xpath_node na = c.n;
10454 return xpath_string::from_const(local_name(na));
10457 case ast_func_local_name_1:
10459 xpath_allocator_capture cr(stack.result);
10461 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10462 xpath_node na = ns.first();
10464 return xpath_string::from_const(local_name(na));
10467 case ast_func_name_0:
10469 xpath_node na = c.n;
10471 return xpath_string::from_const(qualified_name(na));
10474 case ast_func_name_1:
10476 xpath_allocator_capture cr(stack.result);
10478 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10479 xpath_node na = ns.first();
10481 return xpath_string::from_const(qualified_name(na));
10484 case ast_func_namespace_uri_0:
10486 xpath_node na = c.n;
10488 return xpath_string::from_const(namespace_uri(na));
10491 case ast_func_namespace_uri_1:
10493 xpath_allocator_capture cr(stack.result);
10495 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10496 xpath_node na = ns.first();
10498 return xpath_string::from_const(namespace_uri(na));
10501 case ast_func_string_0:
10502 return string_value(c.n, stack.result);
10504 case ast_func_string_1:
10505 return _left->eval_string(c, stack);
10507 case ast_func_concat:
10508 return eval_string_concat(c, stack);
10510 case ast_func_substring_before:
10512 xpath_allocator_capture cr(stack.temp);
10514 xpath_stack swapped_stack = {stack.temp, stack.result};
10516 xpath_string s = _left->eval_string(c, swapped_stack);
10517 xpath_string p = _right->eval_string(c, swapped_stack);
10519 const char_t* pos = find_substring(s.c_str(), p.c_str());
10521 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10524 case ast_func_substring_after:
10526 xpath_allocator_capture cr(stack.temp);
10528 xpath_stack swapped_stack = {stack.temp, stack.result};
10530 xpath_string s = _left->eval_string(c, swapped_stack);
10531 xpath_string p = _right->eval_string(c, swapped_stack);
10533 const char_t* pos = find_substring(s.c_str(), p.c_str());
10534 if (!pos) return xpath_string();
10536 const char_t* rbegin = pos + p.length();
10537 const char_t* rend = s.c_str() + s.length();
10539 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10542 case ast_func_substring_2:
10544 xpath_allocator_capture cr(stack.temp);
10546 xpath_stack swapped_stack = {stack.temp, stack.result};
10548 xpath_string s = _left->eval_string(c, swapped_stack);
10549 size_t s_length = s.length();
10551 double first = round_nearest(_right->eval_number(c, stack));
10553 if (is_nan(first)) return xpath_string(); // NaN
10554 else if (first >= s_length + 1) return xpath_string();
10556 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10557 assert(1 <= pos && pos <= s_length + 1);
10559 const char_t* rbegin = s.c_str() + (pos - 1);
10560 const char_t* rend = s.c_str() + s.length();
10562 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10565 case ast_func_substring_3:
10567 xpath_allocator_capture cr(stack.temp);
10569 xpath_stack swapped_stack = {stack.temp, stack.result};
10571 xpath_string s = _left->eval_string(c, swapped_stack);
10572 size_t s_length = s.length();
10574 double first = round_nearest(_right->eval_number(c, stack));
10575 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10577 if (is_nan(first) || is_nan(last)) return xpath_string();
10578 else if (first >= s_length + 1) return xpath_string();
10579 else if (first >= last) return xpath_string();
10580 else if (last < 1) return xpath_string();
10582 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10583 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10585 assert(1 <= pos && pos <= end && end <= s_length + 1);
10586 const char_t* rbegin = s.c_str() + (pos - 1);
10587 const char_t* rend = s.c_str() + (end - 1);
10589 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10592 case ast_func_normalize_space_0:
10594 xpath_string s = string_value(c.n, stack.result);
10596 char_t* begin = s.data(stack.result);
10597 char_t* end = normalize_space(begin);
10599 return xpath_string::from_heap_preallocated(begin, end);
10602 case ast_func_normalize_space_1:
10604 xpath_string s = _left->eval_string(c, stack);
10606 char_t* begin = s.data(stack.result);
10607 char_t* end = normalize_space(begin);
10609 return xpath_string::from_heap_preallocated(begin, end);
10612 case ast_func_translate:
10614 xpath_allocator_capture cr(stack.temp);
10616 xpath_stack swapped_stack = {stack.temp, stack.result};
10618 xpath_string s = _left->eval_string(c, stack);
10619 xpath_string from = _right->eval_string(c, swapped_stack);
10620 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10622 char_t* begin = s.data(stack.result);
10623 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10625 return xpath_string::from_heap_preallocated(begin, end);
10628 case ast_opt_translate_table:
10630 xpath_string s = _left->eval_string(c, stack);
10632 char_t* begin = s.data(stack.result);
10633 char_t* end = translate_table(begin, _data.table);
10635 return xpath_string::from_heap_preallocated(begin, end);
10640 assert(_rettype == _data.variable->type());
10642 if (_rettype == xpath_type_string)
10643 return xpath_string::from_const(_data.variable->get_string());
10645 // fallthrough to type conversion
10652 case xpath_type_boolean:
10653 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10655 case xpath_type_number:
10656 return convert_number_to_string(eval_number(c, stack), stack.result);
10658 case xpath_type_node_set:
10660 xpath_allocator_capture cr(stack.temp);
10662 xpath_stack swapped_stack = {stack.temp, stack.result};
10664 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10665 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10669 assert(false && "Wrong expression for return type string");
10670 return xpath_string();
10676 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10682 xpath_allocator_capture cr(stack.temp);
10684 xpath_stack swapped_stack = {stack.temp, stack.result};
10686 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10687 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10689 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10690 rs.set_type(xpath_node_set::type_unsorted);
10692 rs.append(ls.begin(), ls.end(), stack.result);
10693 rs.remove_duplicates();
10700 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10702 // either expression is a number or it contains position() call; sort by document order
10703 if (_test != predicate_posinv) set.sort_do();
10705 bool once = eval_once(set.type(), eval);
10707 apply_predicate(set, 0, stack, once);
10713 return xpath_node_set_raw();
10719 case axis_ancestor:
10720 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10722 case axis_ancestor_or_self:
10723 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10725 case axis_attribute:
10726 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10729 return step_do(c, stack, eval, axis_to_type<axis_child>());
10731 case axis_descendant:
10732 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10734 case axis_descendant_or_self:
10735 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10737 case axis_following:
10738 return step_do(c, stack, eval, axis_to_type<axis_following>());
10740 case axis_following_sibling:
10741 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10743 case axis_namespace:
10744 // namespaced axis is not supported
10745 return xpath_node_set_raw();
10748 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10750 case axis_preceding:
10751 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10753 case axis_preceding_sibling:
10754 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10757 return step_do(c, stack, eval, axis_to_type<axis_self>());
10760 assert(false && "Unknown axis");
10761 return xpath_node_set_raw();
10765 case ast_step_root:
10767 assert(!_right); // root step can't have any predicates
10769 xpath_node_set_raw ns;
10771 ns.set_type(xpath_node_set::type_sorted);
10773 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10774 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10781 assert(_rettype == _data.variable->type());
10783 if (_rettype == xpath_type_node_set)
10785 const xpath_node_set& s = _data.variable->get_node_set();
10787 xpath_node_set_raw ns;
10789 ns.set_type(s.type());
10790 ns.append(s.begin(), s.end(), stack.result);
10795 // fallthrough to type conversion
10799 assert(false && "Wrong expression for return type node set");
10800 return xpath_node_set_raw();
10804 void optimize(xpath_allocator* alloc)
10806 if (_left) _left->optimize(alloc);
10807 if (_right) _right->optimize(alloc);
10808 if (_next) _next->optimize(alloc);
10810 optimize_self(alloc);
10813 void optimize_self(xpath_allocator* alloc)
10815 // Rewrite [position()=expr] with [expr]
10816 // Note that this step has to go before classification to recognize [position()=1]
10817 if ((_type == ast_filter || _type == ast_predicate) &&
10818 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10820 _right = _right->_right;
10823 // Classify filter/predicate ops to perform various optimizations during evaluation
10824 if (_type == ast_filter || _type == ast_predicate)
10826 assert(_test == predicate_default);
10828 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10829 _test = predicate_constant_one;
10830 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10831 _test = predicate_constant;
10832 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10833 _test = predicate_posinv;
10836 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10837 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10838 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10839 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10840 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10841 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10844 if (_axis == axis_child || _axis == axis_descendant)
10845 _axis = axis_descendant;
10847 _axis = axis_descendant_or_self;
10849 _left = _left->_left;
10852 // Use optimized lookup table implementation for translate() with constant arguments
10853 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10855 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10859 _type = ast_opt_translate_table;
10860 _data.table = table;
10864 // Use optimized path for @attr = 'value' or @attr = $value
10865 if (_type == ast_op_equal &&
10866 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10867 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10869 _type = ast_opt_compare_attribute;
10873 bool is_posinv_expr() const
10877 case ast_func_position:
10878 case ast_func_last:
10881 case ast_string_constant:
10882 case ast_number_constant:
10887 case ast_step_root:
10890 case ast_predicate:
10895 if (_left && !_left->is_posinv_expr()) return false;
10897 for (xpath_ast_node* n = _right; n; n = n->_next)
10898 if (!n->is_posinv_expr()) return false;
10904 bool is_posinv_step() const
10906 assert(_type == ast_step);
10908 for (xpath_ast_node* n = _right; n; n = n->_next)
10910 assert(n->_type == ast_predicate);
10912 if (n->_test != predicate_posinv)
10919 xpath_value_type rettype() const
10921 return static_cast<xpath_value_type>(_rettype);
10925 struct xpath_parser
10927 xpath_allocator* _alloc;
10928 xpath_lexer _lexer;
10930 const char_t* _query;
10931 xpath_variable_set* _variables;
10933 xpath_parse_result* _result;
10935 char_t _scratch[32];
10937 #ifdef PUGIXML_NO_EXCEPTIONS
10938 jmp_buf _error_handler;
10941 void throw_error(const char* message)
10943 _result->error = message;
10944 _result->offset = _lexer.current_pos() - _query;
10946 #ifdef PUGIXML_NO_EXCEPTIONS
10947 longjmp(_error_handler, 1);
10949 throw xpath_exception(*_result);
10953 void throw_error_oom()
10955 #ifdef PUGIXML_NO_EXCEPTIONS
10956 throw_error("Out of memory");
10958 throw std::bad_alloc();
10964 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10966 if (!result) throw_error_oom();
10971 const char_t* alloc_string(const xpath_lexer_string& value)
10975 size_t length = static_cast<size_t>(value.end - value.begin);
10977 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10978 if (!c) throw_error_oom();
10979 assert(c); // workaround for clang static analysis
10981 memcpy(c, value.begin, length * sizeof(char_t));
10989 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
10993 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10995 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10998 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11000 switch (name.begin[0])
11003 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11004 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
11009 if (name == PUGIXML_TEXT("count") && argc == 1)
11011 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11012 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
11014 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11015 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11016 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11017 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11018 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11019 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
11024 if (name == PUGIXML_TEXT("false") && argc == 0)
11025 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
11026 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11027 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
11032 if (name == PUGIXML_TEXT("id") && argc == 1)
11033 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
11038 if (name == PUGIXML_TEXT("last") && argc == 0)
11039 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
11040 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11041 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
11042 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11043 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
11048 if (name == PUGIXML_TEXT("name") && argc <= 1)
11049 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
11050 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11051 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
11052 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11053 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11054 else if (name == PUGIXML_TEXT("not") && argc == 1)
11055 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
11056 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11057 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11062 if (name == PUGIXML_TEXT("position") && argc == 0)
11063 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
11068 if (name == PUGIXML_TEXT("round") && argc == 1)
11069 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
11074 if (name == PUGIXML_TEXT("string") && argc <= 1)
11075 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11076 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11077 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11078 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11079 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11080 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11081 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11082 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11083 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11084 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11085 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11086 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11088 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11089 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
11095 if (name == PUGIXML_TEXT("translate") && argc == 3)
11096 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11097 else if (name == PUGIXML_TEXT("true") && argc == 0)
11098 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
11106 throw_error("Unrecognized function or wrong parameter count");
11111 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11115 switch (name.begin[0])
11118 if (name == PUGIXML_TEXT("ancestor"))
11119 return axis_ancestor;
11120 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11121 return axis_ancestor_or_self;
11122 else if (name == PUGIXML_TEXT("attribute"))
11123 return axis_attribute;
11128 if (name == PUGIXML_TEXT("child"))
11134 if (name == PUGIXML_TEXT("descendant"))
11135 return axis_descendant;
11136 else if (name == PUGIXML_TEXT("descendant-or-self"))
11137 return axis_descendant_or_self;
11142 if (name == PUGIXML_TEXT("following"))
11143 return axis_following;
11144 else if (name == PUGIXML_TEXT("following-sibling"))
11145 return axis_following_sibling;
11150 if (name == PUGIXML_TEXT("namespace"))
11151 return axis_namespace;
11156 if (name == PUGIXML_TEXT("parent"))
11157 return axis_parent;
11158 else if (name == PUGIXML_TEXT("preceding"))
11159 return axis_preceding;
11160 else if (name == PUGIXML_TEXT("preceding-sibling"))
11161 return axis_preceding_sibling;
11166 if (name == PUGIXML_TEXT("self"))
11179 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11181 switch (name.begin[0])
11184 if (name == PUGIXML_TEXT("comment"))
11185 return nodetest_type_comment;
11190 if (name == PUGIXML_TEXT("node"))
11191 return nodetest_type_node;
11196 if (name == PUGIXML_TEXT("processing-instruction"))
11197 return nodetest_type_pi;
11202 if (name == PUGIXML_TEXT("text"))
11203 return nodetest_type_text;
11211 return nodetest_none;
11214 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11215 xpath_ast_node* parse_primary_expression()
11217 switch (_lexer.current())
11221 xpath_lexer_string name = _lexer.contents();
11224 throw_error("Unknown variable: variable set is not provided");
11226 xpath_variable* var = 0;
11227 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11231 throw_error("Unknown variable: variable set does not contain the given name");
11235 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
11238 case lex_open_brace:
11242 xpath_ast_node* n = parse_expression();
11244 if (_lexer.current() != lex_close_brace)
11245 throw_error("Unmatched braces");
11252 case lex_quoted_string:
11254 const char_t* value = alloc_string(_lexer.contents());
11256 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
11266 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11269 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
11277 xpath_ast_node* args[2] = {0};
11280 xpath_lexer_string function = _lexer.contents();
11283 xpath_ast_node* last_arg = 0;
11285 if (_lexer.current() != lex_open_brace)
11286 throw_error("Unrecognized function call");
11289 if (_lexer.current() != lex_close_brace)
11290 args[argc++] = parse_expression();
11292 while (_lexer.current() != lex_close_brace)
11294 if (_lexer.current() != lex_comma)
11295 throw_error("No comma between function arguments");
11298 xpath_ast_node* n = parse_expression();
11300 if (argc < 2) args[argc] = n;
11301 else last_arg->set_next(n);
11309 return parse_function(function, argc, args);
11313 throw_error("Unrecognizable primary expression");
11319 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11320 // Predicate ::= '[' PredicateExpr ']'
11321 // PredicateExpr ::= Expr
11322 xpath_ast_node* parse_filter_expression()
11324 xpath_ast_node* n = parse_primary_expression();
11326 while (_lexer.current() == lex_open_square_brace)
11330 xpath_ast_node* expr = parse_expression();
11332 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
11334 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
11336 if (_lexer.current() != lex_close_square_brace)
11337 throw_error("Unmatched square brace");
11345 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11346 // AxisSpecifier ::= AxisName '::' | '@'?
11347 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11348 // NameTest ::= '*' | NCName ':' '*' | QName
11349 // AbbreviatedStep ::= '.' | '..'
11350 xpath_ast_node* parse_step(xpath_ast_node* set)
11352 if (set && set->rettype() != xpath_type_node_set)
11353 throw_error("Step has to be applied to node set");
11355 bool axis_specified = false;
11356 axis_t axis = axis_child; // implied child axis
11358 if (_lexer.current() == lex_axis_attribute)
11360 axis = axis_attribute;
11361 axis_specified = true;
11365 else if (_lexer.current() == lex_dot)
11369 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
11371 else if (_lexer.current() == lex_double_dot)
11375 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11378 nodetest_t nt_type = nodetest_none;
11379 xpath_lexer_string nt_name;
11381 if (_lexer.current() == lex_string)
11384 nt_name = _lexer.contents();
11387 // was it an axis name?
11388 if (_lexer.current() == lex_double_colon)
11391 if (axis_specified) throw_error("Two axis specifiers in one step");
11393 axis = parse_axis_name(nt_name, axis_specified);
11395 if (!axis_specified) throw_error("Unknown axis");
11397 // read actual node test
11400 if (_lexer.current() == lex_multiply)
11402 nt_type = nodetest_all;
11403 nt_name = xpath_lexer_string();
11406 else if (_lexer.current() == lex_string)
11408 nt_name = _lexer.contents();
11411 else throw_error("Unrecognized node test");
11414 if (nt_type == nodetest_none)
11416 // node type test or processing-instruction
11417 if (_lexer.current() == lex_open_brace)
11421 if (_lexer.current() == lex_close_brace)
11425 nt_type = parse_node_test_type(nt_name);
11427 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
11429 nt_name = xpath_lexer_string();
11431 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11433 if (_lexer.current() != lex_quoted_string)
11434 throw_error("Only literals are allowed as arguments to processing-instruction()");
11436 nt_type = nodetest_pi;
11437 nt_name = _lexer.contents();
11440 if (_lexer.current() != lex_close_brace)
11441 throw_error("Unmatched brace near processing-instruction()");
11445 throw_error("Unmatched brace near node type test");
11448 // QName or NCName:*
11451 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11453 nt_name.end--; // erase *
11455 nt_type = nodetest_all_in_namespace;
11457 else nt_type = nodetest_name;
11461 else if (_lexer.current() == lex_multiply)
11463 nt_type = nodetest_all;
11466 else throw_error("Unrecognized node test");
11468 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
11470 xpath_ast_node* last = 0;
11472 while (_lexer.current() == lex_open_square_brace)
11476 xpath_ast_node* expr = parse_expression();
11478 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11480 if (_lexer.current() != lex_close_square_brace)
11481 throw_error("Unmatched square brace");
11484 if (last) last->set_next(pred);
11485 else n->set_right(pred);
11493 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11494 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11496 xpath_ast_node* n = parse_step(set);
11498 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11500 lexeme_t l = _lexer.current();
11503 if (l == lex_double_slash)
11504 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11512 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11513 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11514 xpath_ast_node* parse_location_path()
11516 if (_lexer.current() == lex_slash)
11520 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11522 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11523 lexeme_t l = _lexer.current();
11525 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11526 return parse_relative_location_path(n);
11530 else if (_lexer.current() == lex_double_slash)
11534 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11535 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11537 return parse_relative_location_path(n);
11540 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11541 return parse_relative_location_path(0);
11544 // PathExpr ::= LocationPath
11546 // | FilterExpr '/' RelativeLocationPath
11547 // | FilterExpr '//' RelativeLocationPath
11548 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11549 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11550 xpath_ast_node* parse_path_or_unary_expression()
11553 // PathExpr begins with either LocationPath or FilterExpr.
11554 // FilterExpr begins with PrimaryExpr
11555 // PrimaryExpr begins with '$' in case of it being a variable reference,
11556 // '(' in case of it being an expression, string literal, number constant or
11559 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11560 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11561 _lexer.current() == lex_string)
11563 if (_lexer.current() == lex_string)
11565 // This is either a function call, or not - if not, we shall proceed with location path
11566 const char_t* state = _lexer.state();
11568 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11570 if (*state != '(') return parse_location_path();
11572 // This looks like a function call; however this still can be a node-test. Check it.
11573 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
11576 xpath_ast_node* n = parse_filter_expression();
11578 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11580 lexeme_t l = _lexer.current();
11583 if (l == lex_double_slash)
11585 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
11587 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11590 // select from location path
11591 return parse_relative_location_path(n);
11596 else if (_lexer.current() == lex_minus)
11600 // precedence 7+ - only parses union expressions
11601 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
11603 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11606 return parse_location_path();
11611 ast_type_t asttype;
11612 xpath_value_type rettype;
11615 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11619 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11623 static binary_op_t parse(xpath_lexer& lexer)
11625 switch (lexer.current())
11628 if (lexer.contents() == PUGIXML_TEXT("or"))
11629 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11630 else if (lexer.contents() == PUGIXML_TEXT("and"))
11631 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11632 else if (lexer.contents() == PUGIXML_TEXT("div"))
11633 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11634 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11635 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11637 return binary_op_t();
11640 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11642 case lex_not_equal:
11643 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11646 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11649 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11651 case lex_less_or_equal:
11652 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11654 case lex_greater_or_equal:
11655 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11658 return binary_op_t(ast_op_add, xpath_type_number, 5);
11661 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11664 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11667 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11670 return binary_op_t();
11675 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11677 binary_op_t op = binary_op_t::parse(_lexer);
11679 while (op.asttype != ast_unknown && op.precedence >= limit)
11683 xpath_ast_node* rhs = parse_path_or_unary_expression();
11685 binary_op_t nextop = binary_op_t::parse(_lexer);
11687 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11689 rhs = parse_expression_rec(rhs, nextop.precedence);
11691 nextop = binary_op_t::parse(_lexer);
11694 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11695 throw_error("Union operator has to be applied to node sets");
11697 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11699 op = binary_op_t::parse(_lexer);
11706 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11707 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11708 // EqualityExpr ::= RelationalExpr
11709 // | EqualityExpr '=' RelationalExpr
11710 // | EqualityExpr '!=' RelationalExpr
11711 // RelationalExpr ::= AdditiveExpr
11712 // | RelationalExpr '<' AdditiveExpr
11713 // | RelationalExpr '>' AdditiveExpr
11714 // | RelationalExpr '<=' AdditiveExpr
11715 // | RelationalExpr '>=' AdditiveExpr
11716 // AdditiveExpr ::= MultiplicativeExpr
11717 // | AdditiveExpr '+' MultiplicativeExpr
11718 // | AdditiveExpr '-' MultiplicativeExpr
11719 // MultiplicativeExpr ::= UnaryExpr
11720 // | MultiplicativeExpr '*' UnaryExpr
11721 // | MultiplicativeExpr 'div' UnaryExpr
11722 // | MultiplicativeExpr 'mod' UnaryExpr
11723 xpath_ast_node* parse_expression()
11725 return parse_expression_rec(parse_path_or_unary_expression(), 0);
11728 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11732 xpath_ast_node* parse()
11734 xpath_ast_node* result = parse_expression();
11736 if (_lexer.current() != lex_eof)
11738 // there are still unparsed tokens left, error
11739 throw_error("Incorrect query");
11745 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11747 xpath_parser parser(query, variables, alloc, result);
11749 #ifdef PUGIXML_NO_EXCEPTIONS
11750 int error = setjmp(parser._error_handler);
11752 return (error == 0) ? parser.parse() : 0;
11754 return parser.parse();
11759 struct xpath_query_impl
11761 static xpath_query_impl* create()
11763 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11764 if (!memory) return 0;
11766 return new (memory) xpath_query_impl();
11769 static void destroy(xpath_query_impl* impl)
11771 // free all allocated pages
11772 impl->alloc.release();
11774 // free allocator memory (with the first page)
11775 xml_memory::deallocate(impl);
11778 xpath_query_impl(): root(0), alloc(&block)
11781 block.capacity = sizeof(block.data);
11784 xpath_ast_node* root;
11785 xpath_allocator alloc;
11786 xpath_memory_block block;
11789 PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
11791 if (!impl) return xpath_string();
11793 #ifdef PUGIXML_NO_EXCEPTIONS
11794 if (setjmp(sd.error_handler)) return xpath_string();
11797 xpath_context c(n, 1, 1);
11799 return impl->root->eval_string(c, sd.stack);
11802 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11804 if (!impl) return 0;
11806 if (impl->root->rettype() != xpath_type_node_set)
11808 #ifdef PUGIXML_NO_EXCEPTIONS
11811 xpath_parse_result res;
11812 res.error = "Expression does not evaluate to node set";
11814 throw xpath_exception(res);
11824 #ifndef PUGIXML_NO_EXCEPTIONS
11825 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11827 assert(_result.error);
11830 PUGI__FN const char* xpath_exception::what() const throw()
11832 return _result.error;
11835 PUGI__FN const xpath_parse_result& xpath_exception::result() const
11841 PUGI__FN xpath_node::xpath_node()
11845 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11849 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11853 PUGI__FN xml_node xpath_node::node() const
11855 return _attribute ? xml_node() : _node;
11858 PUGI__FN xml_attribute xpath_node::attribute() const
11863 PUGI__FN xml_node xpath_node::parent() const
11865 return _attribute ? _node : _node.parent();
11868 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11872 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11874 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11877 PUGI__FN bool xpath_node::operator!() const
11879 return !(_node || _attribute);
11882 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11884 return _node == n._node && _attribute == n._attribute;
11887 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11889 return _node != n._node || _attribute != n._attribute;
11892 #ifdef __BORLANDC__
11893 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
11895 return (bool)lhs && rhs;
11898 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
11900 return (bool)lhs || rhs;
11904 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
11906 assert(begin_ <= end_);
11908 size_t size_ = static_cast<size_t>(end_ - begin_);
11912 // deallocate old buffer
11913 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11915 // use internal buffer
11916 if (begin_ != end_) _storage = *begin_;
11918 _begin = &_storage;
11919 _end = &_storage + size_;
11925 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11929 #ifdef PUGIXML_NO_EXCEPTIONS
11932 throw std::bad_alloc();
11936 memcpy(storage, begin_, size_ * sizeof(xpath_node));
11938 // deallocate old buffer
11939 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11943 _end = storage + size_;
11948 #if __cplusplus >= 201103
11949 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
11952 _storage = rhs._storage;
11953 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11954 _end = _begin + (rhs._end - rhs._begin);
11956 rhs._type = type_unsorted;
11957 rhs._begin = &rhs._storage;
11958 rhs._end = rhs._begin;
11962 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11966 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11968 _assign(begin_, end_, type_);
11971 PUGI__FN xpath_node_set::~xpath_node_set()
11973 if (_begin != &_storage)
11974 impl::xml_memory::deallocate(_begin);
11977 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11979 _assign(ns._begin, ns._end, ns._type);
11982 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
11984 if (this == &ns) return *this;
11986 _assign(ns._begin, ns._end, ns._type);
11991 #if __cplusplus >= 201103
11992 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11997 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
11999 if (this == &rhs) return *this;
12001 if (_begin != &_storage)
12002 impl::xml_memory::deallocate(_begin);
12010 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12015 PUGI__FN size_t xpath_node_set::size() const
12017 return _end - _begin;
12020 PUGI__FN bool xpath_node_set::empty() const
12022 return _begin == _end;
12025 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12027 assert(index < size());
12028 return _begin[index];
12031 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12036 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12041 PUGI__FN void xpath_node_set::sort(bool reverse)
12043 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12046 PUGI__FN xpath_node xpath_node_set::first() const
12048 return impl::xpath_first(_begin, _end, _type);
12051 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12055 PUGI__FN xpath_parse_result::operator bool() const
12060 PUGI__FN const char* xpath_parse_result::description() const
12062 return error ? error : "No error";
12065 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12069 PUGI__FN const char_t* xpath_variable::name() const
12073 case xpath_type_node_set:
12074 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12076 case xpath_type_number:
12077 return static_cast<const impl::xpath_variable_number*>(this)->name;
12079 case xpath_type_string:
12080 return static_cast<const impl::xpath_variable_string*>(this)->name;
12082 case xpath_type_boolean:
12083 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12086 assert(false && "Invalid variable type");
12091 PUGI__FN xpath_value_type xpath_variable::type() const
12096 PUGI__FN bool xpath_variable::get_boolean() const
12098 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12101 PUGI__FN double xpath_variable::get_number() const
12103 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12106 PUGI__FN const char_t* xpath_variable::get_string() const
12108 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12109 return value ? value : PUGIXML_TEXT("");
12112 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12114 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12117 PUGI__FN bool xpath_variable::set(bool value)
12119 if (_type != xpath_type_boolean) return false;
12121 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12125 PUGI__FN bool xpath_variable::set(double value)
12127 if (_type != xpath_type_number) return false;
12129 static_cast<impl::xpath_variable_number*>(this)->value = value;
12133 PUGI__FN bool xpath_variable::set(const char_t* value)
12135 if (_type != xpath_type_string) return false;
12137 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12139 // duplicate string
12140 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12142 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12143 if (!copy) return false;
12145 memcpy(copy, value, size);
12147 // replace old string
12148 if (var->value) impl::xml_memory::deallocate(var->value);
12154 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12156 if (_type != xpath_type_node_set) return false;
12158 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12162 PUGI__FN xpath_variable_set::xpath_variable_set()
12164 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12168 PUGI__FN xpath_variable_set::~xpath_variable_set()
12170 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12171 _destroy(_data[i]);
12174 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12176 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12182 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12184 if (this == &rhs) return *this;
12191 #if __cplusplus >= 201103
12192 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
12194 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12196 _data[i] = rhs._data[i];
12201 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
12203 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12205 _destroy(_data[i]);
12207 _data[i] = rhs._data[i];
12215 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12217 xpath_variable_set temp;
12219 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12220 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12226 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12228 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12230 xpath_variable* chain = _data[i];
12232 _data[i] = rhs._data[i];
12233 rhs._data[i] = chain;
12237 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12242 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12243 size_t hash = impl::hash_string(name) % hash_size;
12245 // look for existing variable
12246 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12247 if (var->name() && impl::strequal(var->name(), name))
12253 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12255 xpath_variable* last = 0;
12259 // allocate storage for new variable
12260 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12261 if (!nvar) return false;
12263 // link the variable to the result immediately to handle failures gracefully
12265 last->_next = nvar;
12267 *out_result = nvar;
12271 // copy the value; this can fail due to out-of-memory conditions
12272 if (!impl::copy_xpath_variable(nvar, var)) return false;
12280 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12284 xpath_variable* next = var->_next;
12286 impl::delete_xpath_variable(var->_type, var);
12292 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12297 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12298 size_t hash = impl::hash_string(name) % hash_size;
12300 // look for existing variable
12301 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12302 if (var->name() && impl::strequal(var->name(), name))
12303 return var->type() == type ? var : 0;
12305 // add new variable
12306 xpath_variable* result = impl::new_xpath_variable(type, name);
12310 result->_next = _data[hash];
12312 _data[hash] = result;
12318 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12320 xpath_variable* var = add(name, xpath_type_boolean);
12321 return var ? var->set(value) : false;
12324 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12326 xpath_variable* var = add(name, xpath_type_number);
12327 return var ? var->set(value) : false;
12330 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12332 xpath_variable* var = add(name, xpath_type_string);
12333 return var ? var->set(value) : false;
12336 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12338 xpath_variable* var = add(name, xpath_type_node_set);
12339 return var ? var->set(value) : false;
12342 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12344 return _find(name);
12347 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12349 return _find(name);
12352 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12354 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12358 #ifdef PUGIXML_NO_EXCEPTIONS
12359 _result.error = "Out of memory";
12361 throw std::bad_alloc();
12366 using impl::auto_deleter; // MSVC7 workaround
12367 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12369 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12373 qimpl->root->optimize(&qimpl->alloc);
12375 _impl = impl.release();
12381 PUGI__FN xpath_query::xpath_query(): _impl(0)
12385 PUGI__FN xpath_query::~xpath_query()
12388 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12391 #if __cplusplus >= 201103
12392 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
12395 _result = rhs._result;
12397 rhs._result = xpath_parse_result();
12400 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
12402 if (this == &rhs) return *this;
12405 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12408 _result = rhs._result;
12410 rhs._result = xpath_parse_result();
12416 PUGI__FN xpath_value_type xpath_query::return_type() const
12418 if (!_impl) return xpath_type_none;
12420 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12423 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12425 if (!_impl) return false;
12427 impl::xpath_context c(n, 1, 1);
12428 impl::xpath_stack_data sd;
12430 #ifdef PUGIXML_NO_EXCEPTIONS
12431 if (setjmp(sd.error_handler)) return false;
12434 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12437 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12439 if (!_impl) return impl::gen_nan();
12441 impl::xpath_context c(n, 1, 1);
12442 impl::xpath_stack_data sd;
12444 #ifdef PUGIXML_NO_EXCEPTIONS
12445 if (setjmp(sd.error_handler)) return impl::gen_nan();
12448 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12451 #ifndef PUGIXML_NO_STL
12452 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12454 impl::xpath_stack_data sd;
12456 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12458 return string_t(r.c_str(), r.length());
12462 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12464 impl::xpath_stack_data sd;
12466 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12468 size_t full_size = r.length() + 1;
12472 size_t size = (full_size < capacity) ? full_size : capacity;
12475 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12476 buffer[size - 1] = 0;
12482 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12484 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12485 if (!root) return xpath_node_set();
12487 impl::xpath_context c(n, 1, 1);
12488 impl::xpath_stack_data sd;
12490 #ifdef PUGIXML_NO_EXCEPTIONS
12491 if (setjmp(sd.error_handler)) return xpath_node_set();
12494 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12496 return xpath_node_set(r.begin(), r.end(), r.type());
12499 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12501 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12502 if (!root) return xpath_node();
12504 impl::xpath_context c(n, 1, 1);
12505 impl::xpath_stack_data sd;
12507 #ifdef PUGIXML_NO_EXCEPTIONS
12508 if (setjmp(sd.error_handler)) return xpath_node();
12511 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12516 PUGI__FN const xpath_parse_result& xpath_query::result() const
12521 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12525 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12527 return _impl ? unspecified_bool_xpath_query : 0;
12530 PUGI__FN bool xpath_query::operator!() const
12535 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12537 xpath_query q(query, variables);
12538 return select_node(q);
12541 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12543 return query.evaluate_node(*this);
12546 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12548 xpath_query q(query, variables);
12549 return select_nodes(q);
12552 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12554 return query.evaluate_node_set(*this);
12557 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12559 xpath_query q(query, variables);
12560 return select_single_node(q);
12563 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12565 return query.evaluate_node(*this);
12571 #ifdef __BORLANDC__
12572 # pragma option pop
12575 // Intel C++ does not properly keep warning state for function templates,
12576 // so popping warning state at the end of translation unit leads to warnings in the middle.
12577 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12578 # pragma warning(pop)
12581 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12582 #undef PUGI__NO_INLINE
12583 #undef PUGI__UNLIKELY
12584 #undef PUGI__STATIC_ASSERT
12585 #undef PUGI__DMC_VOLATILE
12586 #undef PUGI__MSVC_CRT_VERSION
12587 #undef PUGI__NS_BEGIN
12588 #undef PUGI__NS_END
12590 #undef PUGI__FN_NO_INLINE
12591 #undef PUGI__GETHEADER_IMPL
12592 #undef PUGI__GETPAGE_IMPL
12593 #undef PUGI__GETPAGE
12594 #undef PUGI__NODETYPE
12595 #undef PUGI__IS_CHARTYPE_IMPL
12596 #undef PUGI__IS_CHARTYPE
12597 #undef PUGI__IS_CHARTYPEX
12598 #undef PUGI__ENDSWITH
12599 #undef PUGI__SKIPWS
12600 #undef PUGI__OPTSET
12601 #undef PUGI__PUSHNODE
12602 #undef PUGI__POPNODE
12603 #undef PUGI__SCANFOR
12604 #undef PUGI__SCANWHILE
12605 #undef PUGI__SCANWHILE_UNROLL
12606 #undef PUGI__ENDSEG
12607 #undef PUGI__THROW_ERROR
12608 #undef PUGI__CHECK_ERROR
12613 * Copyright (c) 2006-2016 Arseny Kapoulkine
12615 * Permission is hereby granted, free of charge, to any person
12616 * obtaining a copy of this software and associated documentation
12617 * files (the "Software"), to deal in the Software without
12618 * restriction, including without limitation the rights to use,
12619 * copy, modify, merge, publish, distribute, sublicense, and/or sell
12620 * copies of the Software, and to permit persons to whom the
12621 * Software is furnished to do so, subject to the following
12624 * The above copyright notice and this permission notice shall be
12625 * included in all copies or substantial portions of the Software.
12627 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12628 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12629 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12630 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12631 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12632 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12633 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12634 * OTHER DEALINGS IN THE SOFTWARE.