1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #ifndef SOURCE_PUGIXML_CPP
6 #define SOURCE_PUGIXML_CPP
16 #ifdef PUGIXML_WCHAR_MODE
20 #ifndef PUGIXML_NO_XPATH
23 # ifdef PUGIXML_NO_EXCEPTIONS
28 #ifndef PUGIXML_NO_STL
38 # pragma warning(push)
39 # pragma warning(disable: 4127) // conditional expression is constant
40 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
41 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
42 # pragma warning(disable: 4702) // unreachable code
43 # pragma warning(disable: 4996) // this function or variable may be unsafe
44 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
47 #ifdef __INTEL_COMPILER
48 # pragma warning(disable: 177) // function was declared but never referenced
49 # pragma warning(disable: 279) // controlling expression is constant
50 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
51 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
54 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
55 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
60 # pragma warn -8008 // condition is always false
61 # pragma warn -8066 // unreachable code
65 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
66 # pragma diag_suppress=178 // function was declared but never referenced
67 # pragma diag_suppress=237 // controlling expression is constant
71 #if defined(_MSC_VER) && _MSC_VER >= 1300
72 # define PUGI__NO_INLINE __declspec(noinline)
73 #elif defined(__GNUC__)
74 # define PUGI__NO_INLINE __attribute__((noinline))
76 # define PUGI__NO_INLINE
79 // Branch weight controls
81 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
83 # define PUGI__UNLIKELY(cond) (cond)
86 // Simple static assertion
87 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
89 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
91 # define PUGI__DMC_VOLATILE volatile
93 # define PUGI__DMC_VOLATILE
96 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
97 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
103 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
104 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
105 # define LLONG_MAX 9223372036854775807LL
106 # define LLONG_MIN (-LLONG_MAX-1)
107 # define ULLONG_MAX (2ULL*LLONG_MAX+1)
110 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
111 #if defined(_MSC_VER) && !defined(__S3E__)
112 # define PUGI__MSVC_CRT_VERSION _MSC_VER
115 #ifdef PUGIXML_HEADER_ONLY
116 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
117 # define PUGI__NS_END } }
118 # define PUGI__FN inline
119 # define PUGI__FN_NO_INLINE inline
121 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
122 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
123 # define PUGI__NS_END } }
125 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
126 # define PUGI__NS_END } } }
129 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
133 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
136 # ifndef _UINTPTR_T_DEFINED
137 typedef size_t uintptr_t;
140 typedef unsigned __int8 uint8_t;
141 typedef unsigned __int16 uint16_t;
142 typedef unsigned __int32 uint32_t;
150 PUGI__FN void* default_allocate(size_t size)
155 PUGI__FN void default_deallocate(void* ptr)
160 template <typename T>
161 struct xml_memory_management_function_storage
163 static allocation_function allocate;
164 static deallocation_function deallocate;
167 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
168 // Without a template<> we'll get multiple definitions of the same static
169 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
170 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
172 typedef xml_memory_management_function_storage<int> xml_memory;
178 PUGI__FN size_t strlength(const char_t* s)
182 #ifdef PUGIXML_WCHAR_MODE
189 // Compare two strings
190 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
194 #ifdef PUGIXML_WCHAR_MODE
195 return wcscmp(src, dst) == 0;
197 return strcmp(src, dst) == 0;
201 // Compare lhs with [rhs_begin, rhs_end)
202 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
204 for (size_t i = 0; i < count; ++i)
205 if (lhs[i] != rhs[i])
208 return lhs[count] == 0;
211 // Get length of wide string, even if CRT lacks wide character support
212 PUGI__FN size_t strlength_wide(const wchar_t* s)
216 #ifdef PUGIXML_WCHAR_MODE
219 const wchar_t* end = s;
221 return static_cast<size_t>(end - s);
226 // auto_ptr-like object for exception recovery
228 template <typename T> struct auto_deleter
230 typedef void (*D)(T*);
235 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
241 if (data) deleter(data);
253 #ifdef PUGIXML_COMPACT
255 class compact_hash_table
258 compact_hash_table(): _items(0), _capacity(0), _count(0)
266 xml_memory::deallocate(_items);
273 void** find(const void* key)
277 if (_capacity == 0) return 0;
279 size_t hashmod = _capacity - 1;
280 size_t bucket = hash(key) & hashmod;
282 for (size_t probe = 0; probe <= hashmod; ++probe)
284 item_t& probe_item = _items[bucket];
286 if (probe_item.key == key)
287 return &probe_item.value;
289 if (probe_item.key == 0)
292 // hash collision, quadratic probing
293 bucket = (bucket + probe + 1) & hashmod;
296 assert(false && "Hash table is full");
300 void** insert(const void* key)
303 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
305 size_t hashmod = _capacity - 1;
306 size_t bucket = hash(key) & hashmod;
308 for (size_t probe = 0; probe <= hashmod; ++probe)
310 item_t& probe_item = _items[bucket];
312 if (probe_item.key == 0)
314 probe_item.key = key;
316 return &probe_item.value;
319 if (probe_item.key == key)
320 return &probe_item.value;
322 // hash collision, quadratic probing
323 bucket = (bucket + probe + 1) & hashmod;
326 assert(false && "Hash table is full");
332 if (_count + 16 >= _capacity - _capacity / 4)
352 static unsigned int hash(const void* key)
354 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
356 // MurmurHash3 32-bit finalizer
367 PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
369 compact_hash_table rt;
370 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
371 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
376 memset(rt._items, 0, sizeof(item_t) * rt._capacity);
378 for (size_t i = 0; i < _capacity; ++i)
380 *rt.insert(_items[i].key) = _items[i].value;
383 xml_memory::deallocate(_items);
385 _capacity = rt._capacity;
388 assert(_count == rt._count);
397 #ifdef PUGIXML_COMPACT
398 static const uintptr_t xml_memory_block_alignment = 4;
400 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
403 // extra metadata bits
404 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
405 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
406 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
407 static const uintptr_t xml_memory_page_type_mask = 15;
409 // combined masks for string uniqueness
410 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
411 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
413 #ifdef PUGIXML_COMPACT
414 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
415 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
417 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
418 #define PUGI__GETPAGE_IMPL(header) const_cast<impl::xml_memory_page*>(reinterpret_cast<const impl::xml_memory_page*>(reinterpret_cast<const char*>(&header) - (header >> 8)))
421 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
422 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
424 struct xml_allocator;
426 struct xml_memory_page
428 static xml_memory_page* construct(void* memory)
430 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
432 result->allocator = 0;
435 result->busy_size = 0;
436 result->freed_size = 0;
438 #ifdef PUGIXML_COMPACT
439 result->compact_string_base = 0;
440 result->compact_shared_parent = 0;
441 result->compact_page_marker = 0;
447 xml_allocator* allocator;
449 xml_memory_page* prev;
450 xml_memory_page* next;
455 #ifdef PUGIXML_COMPACT
456 char_t* compact_string_base;
457 void* compact_shared_parent;
458 uint32_t* compact_page_marker;
462 static const size_t xml_memory_page_size =
463 #ifdef PUGIXML_MEMORY_PAGE_SIZE
464 (PUGIXML_MEMORY_PAGE_SIZE)
468 - sizeof(xml_memory_page);
470 struct xml_memory_string_header
472 uint16_t page_offset; // offset from page->data
473 uint16_t full_size; // 0 if string occupies whole page
478 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
480 #ifdef PUGIXML_COMPACT
485 xml_memory_page* allocate_page(size_t data_size)
487 size_t size = sizeof(xml_memory_page) + data_size;
489 // allocate block with some alignment, leaving memory for worst-case padding
490 void* memory = xml_memory::allocate(size);
491 if (!memory) return 0;
493 // prepare page structure
494 xml_memory_page* page = xml_memory_page::construct(memory);
497 page->allocator = _root->allocator;
502 static void deallocate_page(xml_memory_page* page)
504 xml_memory::deallocate(page);
507 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
509 void* allocate_memory(size_t size, xml_memory_page*& out_page)
511 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
512 return allocate_memory_oob(size, out_page);
514 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
523 #ifdef PUGIXML_COMPACT
524 void* allocate_object(size_t size, xml_memory_page*& out_page)
526 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
527 if (!result) return 0;
530 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
532 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
535 uint32_t* marker = static_cast<uint32_t*>(result);
537 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
538 out_page->compact_page_marker = marker;
540 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
541 // this will make sure deallocate_memory correctly tracks the size
542 out_page->freed_size += sizeof(uint32_t);
548 // roll back uint32_t part
549 _busy_size -= sizeof(uint32_t);
555 void* allocate_object(size_t size, xml_memory_page*& out_page)
557 return allocate_memory(size, out_page);
561 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
563 if (page == _root) page->busy_size = _busy_size;
565 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
568 page->freed_size += size;
569 assert(page->freed_size <= page->busy_size);
571 if (page->freed_size == page->busy_size)
575 assert(_root == page);
577 // top page freed, just reset sizes
579 page->freed_size = 0;
581 #ifdef PUGIXML_COMPACT
582 // reset compact state to maximize efficiency
583 page->compact_string_base = 0;
584 page->compact_shared_parent = 0;
585 page->compact_page_marker = 0;
592 assert(_root != page);
595 // remove from the list
596 page->prev->next = page->next;
597 page->next->prev = page->prev;
600 deallocate_page(page);
605 char_t* allocate_string(size_t length)
607 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
609 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
611 // allocate memory for string and header block
612 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
614 // round size up to block alignment boundary
615 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
617 xml_memory_page* page;
618 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
620 if (!header) return 0;
623 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
625 assert(page_offset % xml_memory_block_alignment == 0);
626 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
627 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
629 // full_size == 0 for large strings that occupy the whole page
630 assert(full_size % xml_memory_block_alignment == 0);
631 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
632 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
634 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
635 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
636 return static_cast<char_t*>(static_cast<void*>(header + 1));
639 void deallocate_string(char_t* string)
641 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
642 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
645 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
649 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
650 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
652 // if full_size == 0 then this string occupies the whole page
653 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
655 deallocate_memory(header, full_size, page);
660 #ifdef PUGIXML_COMPACT
661 return _hash->reserve();
667 xml_memory_page* _root;
670 #ifdef PUGIXML_COMPACT
671 compact_hash_table* _hash;
675 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
677 const size_t large_allocation_threshold = xml_memory_page_size / 4;
679 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
684 if (size <= large_allocation_threshold)
686 _root->busy_size = _busy_size;
688 // insert page at the end of linked list
697 // insert page before the end of linked list, so that it is deleted as soon as possible
698 // the last page is not deleted even if it's empty (see deallocate_memory)
701 page->prev = _root->prev;
704 _root->prev->next = page;
707 page->busy_size = size;
710 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
714 #ifdef PUGIXML_COMPACT
716 static const uintptr_t compact_alignment_log2 = 2;
717 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
722 compact_header(xml_memory_page* page, unsigned int flags)
724 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
726 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
727 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
729 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
730 _flags = static_cast<unsigned char>(flags);
733 void operator&=(uintptr_t mod)
735 _flags &= static_cast<unsigned char>(mod);
738 void operator|=(uintptr_t mod)
740 _flags |= static_cast<unsigned char>(mod);
743 uintptr_t operator&(uintptr_t mod) const
748 xml_memory_page* get_page() const
750 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
751 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
752 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
754 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
759 unsigned char _flags;
762 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
764 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
766 return header->get_page();
769 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
771 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
774 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
776 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
779 template <typename T, int header_offset, int start = -126> class compact_pointer
782 compact_pointer(): _data(0)
786 void operator=(const compact_pointer& rhs)
791 void operator=(T* value)
795 // value is guaranteed to be compact-aligned; 'this' is not
796 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
797 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
798 // compensate for arithmetic shift rounding for negative values
799 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
800 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
802 if (static_cast<uintptr_t>(offset) <= 253)
803 _data = static_cast<unsigned char>(offset + 1);
806 compact_set_value<header_offset>(this, value);
821 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
823 return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
826 return compact_get_value<header_offset, T>(this);
832 T* operator->() const
841 template <typename T, int header_offset> class compact_pointer_parent
844 compact_pointer_parent(): _data(0)
848 void operator=(const compact_pointer_parent& rhs)
853 void operator=(T* value)
857 // value is guaranteed to be compact-aligned; 'this' is not
858 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
859 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
860 // compensate for arithmetic shift behavior for negative values
861 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
862 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
864 if (static_cast<uintptr_t>(offset) <= 65533)
866 _data = static_cast<unsigned short>(offset + 1);
870 xml_memory_page* page = compact_get_page(this, header_offset);
872 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
873 page->compact_shared_parent = value;
875 if (page->compact_shared_parent == value)
881 compact_set_value<header_offset>(this, value);
899 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
901 return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
903 else if (_data == 65534)
904 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
906 return compact_get_value<header_offset, T>(this);
912 T* operator->() const
921 template <int header_offset, int base_offset> class compact_string
924 compact_string(): _data(0)
928 void operator=(const compact_string& rhs)
933 void operator=(char_t* value)
937 xml_memory_page* page = compact_get_page(this, header_offset);
939 if (PUGI__UNLIKELY(page->compact_string_base == 0))
940 page->compact_string_base = value;
942 ptrdiff_t offset = value - page->compact_string_base;
944 if (static_cast<uintptr_t>(offset) < (65535 << 7))
946 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
947 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
951 *base = static_cast<uint16_t>((offset >> 7) + 1);
952 _data = static_cast<unsigned char>((offset & 127) + 1);
956 ptrdiff_t remainder = offset - ((*base - 1) << 7);
958 if (static_cast<uintptr_t>(remainder) <= 253)
960 _data = static_cast<unsigned char>(remainder + 1);
964 compact_set_value<header_offset>(this, value);
972 compact_set_value<header_offset>(this, value);
983 operator char_t*() const
989 xml_memory_page* page = compact_get_page(this, header_offset);
991 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
992 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
995 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
997 return page->compact_string_base + offset;
1001 return compact_get_value<header_offset, char_t>(this);
1009 unsigned char _data;
1014 #ifdef PUGIXML_COMPACT
1017 struct xml_attribute_struct
1019 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1021 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1024 impl::compact_header header;
1026 uint16_t namevalue_base;
1028 impl::compact_string<4, 2> name;
1029 impl::compact_string<5, 3> value;
1031 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1032 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1035 struct xml_node_struct
1037 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1039 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1042 impl::compact_header header;
1044 uint16_t namevalue_base;
1046 impl::compact_string<4, 2> name;
1047 impl::compact_string<5, 3> value;
1049 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1051 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1053 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1054 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1056 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1062 struct xml_attribute_struct
1064 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1066 header = PUGI__GETHEADER_IMPL(this, page, 0);
1074 xml_attribute_struct* prev_attribute_c;
1075 xml_attribute_struct* next_attribute;
1078 struct xml_node_struct
1080 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1082 header = PUGI__GETHEADER_IMPL(this, page, type);
1090 xml_node_struct* parent;
1092 xml_node_struct* first_child;
1094 xml_node_struct* prev_sibling_c;
1095 xml_node_struct* next_sibling;
1097 xml_attribute_struct* first_attribute;
1103 struct xml_extra_buffer
1106 xml_extra_buffer* next;
1109 struct xml_document_struct: public xml_node_struct, public xml_allocator
1111 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1113 #ifdef PUGIXML_COMPACT
1118 const char_t* buffer;
1120 xml_extra_buffer* extra_buffers;
1122 #ifdef PUGIXML_COMPACT
1123 compact_hash_table hash;
1127 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1131 return *PUGI__GETPAGE(object)->allocator;
1134 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1138 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1142 // Low-level DOM operations
1144 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1146 xml_memory_page* page;
1147 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1148 if (!memory) return 0;
1150 return new (memory) xml_attribute_struct(page);
1153 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1155 xml_memory_page* page;
1156 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1157 if (!memory) return 0;
1159 return new (memory) xml_node_struct(page, type);
1162 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1164 if (a->header & impl::xml_memory_page_name_allocated_mask)
1165 alloc.deallocate_string(a->name);
1167 if (a->header & impl::xml_memory_page_value_allocated_mask)
1168 alloc.deallocate_string(a->value);
1170 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1173 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1175 if (n->header & impl::xml_memory_page_name_allocated_mask)
1176 alloc.deallocate_string(n->name);
1178 if (n->header & impl::xml_memory_page_value_allocated_mask)
1179 alloc.deallocate_string(n->value);
1181 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1183 xml_attribute_struct* next = attr->next_attribute;
1185 destroy_attribute(attr, alloc);
1190 for (xml_node_struct* child = n->first_child; child; )
1192 xml_node_struct* next = child->next_sibling;
1194 destroy_node(child, alloc);
1199 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1202 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1204 child->parent = node;
1206 xml_node_struct* head = node->first_child;
1210 xml_node_struct* tail = head->prev_sibling_c;
1212 tail->next_sibling = child;
1213 child->prev_sibling_c = tail;
1214 head->prev_sibling_c = child;
1218 node->first_child = child;
1219 child->prev_sibling_c = child;
1223 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1225 child->parent = node;
1227 xml_node_struct* head = node->first_child;
1231 child->prev_sibling_c = head->prev_sibling_c;
1232 head->prev_sibling_c = child;
1235 child->prev_sibling_c = child;
1237 child->next_sibling = head;
1238 node->first_child = child;
1241 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1243 xml_node_struct* parent = node->parent;
1245 child->parent = parent;
1247 if (node->next_sibling)
1248 node->next_sibling->prev_sibling_c = child;
1250 parent->first_child->prev_sibling_c = child;
1252 child->next_sibling = node->next_sibling;
1253 child->prev_sibling_c = node;
1255 node->next_sibling = child;
1258 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1260 xml_node_struct* parent = node->parent;
1262 child->parent = parent;
1264 if (node->prev_sibling_c->next_sibling)
1265 node->prev_sibling_c->next_sibling = child;
1267 parent->first_child = child;
1269 child->prev_sibling_c = node->prev_sibling_c;
1270 child->next_sibling = node;
1272 node->prev_sibling_c = child;
1275 inline void remove_node(xml_node_struct* node)
1277 xml_node_struct* parent = node->parent;
1279 if (node->next_sibling)
1280 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1282 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1284 if (node->prev_sibling_c->next_sibling)
1285 node->prev_sibling_c->next_sibling = node->next_sibling;
1287 parent->first_child = node->next_sibling;
1290 node->prev_sibling_c = 0;
1291 node->next_sibling = 0;
1294 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1296 xml_attribute_struct* head = node->first_attribute;
1300 xml_attribute_struct* tail = head->prev_attribute_c;
1302 tail->next_attribute = attr;
1303 attr->prev_attribute_c = tail;
1304 head->prev_attribute_c = attr;
1308 node->first_attribute = attr;
1309 attr->prev_attribute_c = attr;
1313 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1315 xml_attribute_struct* head = node->first_attribute;
1319 attr->prev_attribute_c = head->prev_attribute_c;
1320 head->prev_attribute_c = attr;
1323 attr->prev_attribute_c = attr;
1325 attr->next_attribute = head;
1326 node->first_attribute = attr;
1329 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1331 if (place->next_attribute)
1332 place->next_attribute->prev_attribute_c = attr;
1334 node->first_attribute->prev_attribute_c = attr;
1336 attr->next_attribute = place->next_attribute;
1337 attr->prev_attribute_c = place;
1338 place->next_attribute = attr;
1341 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1343 if (place->prev_attribute_c->next_attribute)
1344 place->prev_attribute_c->next_attribute = attr;
1346 node->first_attribute = attr;
1348 attr->prev_attribute_c = place->prev_attribute_c;
1349 attr->next_attribute = place;
1350 place->prev_attribute_c = attr;
1353 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1355 if (attr->next_attribute)
1356 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1358 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1360 if (attr->prev_attribute_c->next_attribute)
1361 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1363 node->first_attribute = attr->next_attribute;
1365 attr->prev_attribute_c = 0;
1366 attr->next_attribute = 0;
1369 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1371 if (!alloc.reserve()) return 0;
1373 xml_node_struct* child = allocate_node(alloc, type);
1374 if (!child) return 0;
1376 append_node(child, node);
1381 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1383 if (!alloc.reserve()) return 0;
1385 xml_attribute_struct* attr = allocate_attribute(alloc);
1386 if (!attr) return 0;
1388 append_attribute(attr, node);
1394 // Helper classes for code generation
1407 // Unicode utilities
1409 inline uint16_t endian_swap(uint16_t value)
1411 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1414 inline uint32_t endian_swap(uint32_t value)
1416 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1421 typedef size_t value_type;
1423 static value_type low(value_type result, uint32_t ch)
1426 if (ch < 0x80) return result + 1;
1428 else if (ch < 0x800) return result + 2;
1430 else return result + 3;
1433 static value_type high(value_type result, uint32_t)
1435 // U+10000..U+10FFFF
1442 typedef uint8_t* value_type;
1444 static value_type low(value_type result, uint32_t ch)
1449 *result = static_cast<uint8_t>(ch);
1453 else if (ch < 0x800)
1455 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1456 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1462 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1463 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1464 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1469 static value_type high(value_type result, uint32_t ch)
1471 // U+10000..U+10FFFF
1472 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1473 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1474 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1475 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1479 static value_type any(value_type result, uint32_t ch)
1481 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1485 struct utf16_counter
1487 typedef size_t value_type;
1489 static value_type low(value_type result, uint32_t)
1494 static value_type high(value_type result, uint32_t)
1502 typedef uint16_t* value_type;
1504 static value_type low(value_type result, uint32_t ch)
1506 *result = static_cast<uint16_t>(ch);
1511 static value_type high(value_type result, uint32_t ch)
1513 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1514 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1516 result[0] = static_cast<uint16_t>(0xD800 + msh);
1517 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1522 static value_type any(value_type result, uint32_t ch)
1524 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1528 struct utf32_counter
1530 typedef size_t value_type;
1532 static value_type low(value_type result, uint32_t)
1537 static value_type high(value_type result, uint32_t)
1545 typedef uint32_t* value_type;
1547 static value_type low(value_type result, uint32_t ch)
1554 static value_type high(value_type result, uint32_t ch)
1561 static value_type any(value_type result, uint32_t ch)
1569 struct latin1_writer
1571 typedef uint8_t* value_type;
1573 static value_type low(value_type result, uint32_t ch)
1575 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1580 static value_type high(value_type result, uint32_t ch)
1592 typedef uint8_t type;
1594 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1596 const uint8_t utf8_byte_mask = 0x3f;
1600 uint8_t lead = *data;
1602 // 0xxxxxxx -> U+0000..U+007F
1605 result = Traits::low(result, lead);
1609 // process aligned single-byte (ascii) blocks
1610 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1612 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1613 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1615 result = Traits::low(result, data[0]);
1616 result = Traits::low(result, data[1]);
1617 result = Traits::low(result, data[2]);
1618 result = Traits::low(result, data[3]);
1624 // 110xxxxx -> U+0080..U+07FF
1625 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1627 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1631 // 1110xxxx -> U+0800-U+FFFF
1632 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1634 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1638 // 11110xxx -> U+10000..U+10FFFF
1639 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1641 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1645 // 10xxxxxx or 11111xxx -> invalid
1657 template <typename opt_swap> struct utf16_decoder
1659 typedef uint16_t type;
1661 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1665 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1670 result = Traits::low(result, lead);
1675 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1677 result = Traits::low(result, lead);
1681 // surrogate pair lead
1682 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1684 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1686 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1688 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1709 template <typename opt_swap> struct utf32_decoder
1711 typedef uint32_t type;
1713 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1717 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1722 result = Traits::low(result, lead);
1726 // U+10000..U+10FFFF
1729 result = Traits::high(result, lead);
1739 struct latin1_decoder
1741 typedef uint8_t type;
1743 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1747 result = Traits::low(result, *data);
1756 template <size_t size> struct wchar_selector;
1758 template <> struct wchar_selector<2>
1760 typedef uint16_t type;
1761 typedef utf16_counter counter;
1762 typedef utf16_writer writer;
1763 typedef utf16_decoder<opt_false> decoder;
1766 template <> struct wchar_selector<4>
1768 typedef uint32_t type;
1769 typedef utf32_counter counter;
1770 typedef utf32_writer writer;
1771 typedef utf32_decoder<opt_false> decoder;
1774 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1775 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1777 struct wchar_decoder
1779 typedef wchar_t type;
1781 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1783 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1785 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1789 #ifdef PUGIXML_WCHAR_MODE
1790 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1792 for (size_t i = 0; i < length; ++i)
1793 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1801 ct_parse_pcdata = 1, // \0, &, \r, <
1802 ct_parse_attr = 2, // \0, &, \r, ', "
1803 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1804 ct_space = 8, // \r, \n, space, tab
1805 ct_parse_cdata = 16, // \0, ], >, \r
1806 ct_parse_comment = 32, // \0, -, >, \r
1807 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1808 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1811 static const unsigned char chartype_table[256] =
1813 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1814 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1815 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1816 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1817 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1818 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1819 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1820 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1822 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1823 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1824 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1825 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1826 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1827 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1828 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1829 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1834 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1835 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1836 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1837 ctx_digit = 8, // 0-9
1838 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1841 static const unsigned char chartypex_table[256] =
1843 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1844 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1845 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1846 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1848 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1849 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1850 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1851 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1853 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1854 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1855 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1856 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1857 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1858 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1859 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1860 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1863 #ifdef PUGIXML_WCHAR_MODE
1864 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1866 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1869 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1870 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1872 PUGI__FN bool is_little_endian()
1874 unsigned int ui = 1;
1876 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1879 PUGI__FN xml_encoding get_wchar_encoding()
1881 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1883 if (sizeof(wchar_t) == 2)
1884 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1886 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1889 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1891 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1892 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1894 // check if we have a non-empty XML declaration
1895 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1898 // scan XML declaration until the encoding field
1899 for (size_t i = 6; i + 1 < size; ++i)
1901 // declaration can not contain ? in quoted values
1905 if (data[i] == 'e' && data[i + 1] == 'n')
1909 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1910 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1911 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1914 PUGI__SCANCHARTYPE(ct_space);
1915 PUGI__SCANCHAR('=');
1916 PUGI__SCANCHARTYPE(ct_space);
1918 // the only two valid delimiters are ' and "
1919 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1921 PUGI__SCANCHAR(delimiter);
1923 size_t start = offset;
1925 out_encoding = data + offset;
1927 PUGI__SCANCHARTYPE(ct_symbol);
1929 out_length = offset - start;
1931 PUGI__SCANCHAR(delimiter);
1939 #undef PUGI__SCANCHAR
1940 #undef PUGI__SCANCHARTYPE
1943 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1945 // skip encoding autodetection if input buffer is too small
1946 if (size < 4) return encoding_utf8;
1948 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1950 // look for BOM in first few bytes
1951 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1952 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1953 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1954 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1955 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1957 // look for <, <? or <?xm in various encodings
1958 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1959 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1960 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1961 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1963 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1964 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1965 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1967 // no known BOM detected; parse declaration
1968 const uint8_t* enc = 0;
1969 size_t enc_length = 0;
1971 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
1973 // iso-8859-1 (case-insensitive)
1974 if (enc_length == 10
1975 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
1976 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
1977 && enc[8] == '-' && enc[9] == '1')
1978 return encoding_latin1;
1980 // latin1 (case-insensitive)
1982 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
1983 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
1985 return encoding_latin1;
1988 return encoding_utf8;
1991 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1993 // replace wchar encoding with utf implementation
1994 if (encoding == encoding_wchar) return get_wchar_encoding();
1996 // replace utf16 encoding with utf16 with specific endianness
1997 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1999 // replace utf32 encoding with utf32 with specific endianness
2000 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2002 // only do autodetection if no explicit encoding is requested
2003 if (encoding != encoding_auto) return encoding;
2005 // try to guess encoding (based on XML specification, Appendix F.1)
2006 const uint8_t* data = static_cast<const uint8_t*>(contents);
2008 return guess_buffer_encoding(data, size);
2011 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2013 size_t length = size / sizeof(char_t);
2017 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2018 out_length = length;
2022 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2023 if (!buffer) return false;
2026 memcpy(buffer, contents, length * sizeof(char_t));
2028 assert(length == 0);
2032 out_buffer = buffer;
2033 out_length = length + 1;
2039 #ifdef PUGIXML_WCHAR_MODE
2040 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2042 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2043 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2046 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2048 const char_t* data = static_cast<const char_t*>(contents);
2049 size_t length = size / sizeof(char_t);
2053 char_t* buffer = const_cast<char_t*>(data);
2055 convert_wchar_endian_swap(buffer, data, length);
2057 out_buffer = buffer;
2058 out_length = length;
2062 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2063 if (!buffer) return false;
2065 convert_wchar_endian_swap(buffer, data, length);
2068 out_buffer = buffer;
2069 out_length = length + 1;
2075 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2077 const typename D::type* data = static_cast<const typename D::type*>(contents);
2078 size_t data_length = size / sizeof(typename D::type);
2080 // first pass: get length in wchar_t units
2081 size_t length = D::process(data, data_length, 0, wchar_counter());
2083 // allocate buffer of suitable length
2084 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2085 if (!buffer) return false;
2087 // second pass: convert utf16 input to wchar_t
2088 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2089 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2091 assert(oend == obegin + length);
2094 out_buffer = buffer;
2095 out_length = length + 1;
2100 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2102 // get native encoding
2103 xml_encoding wchar_encoding = get_wchar_encoding();
2105 // fast path: no conversion required
2106 if (encoding == wchar_encoding)
2107 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2109 // only endian-swapping is required
2110 if (need_endian_swap_utf(encoding, wchar_encoding))
2111 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2113 // source encoding is utf8
2114 if (encoding == encoding_utf8)
2115 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2117 // source encoding is utf16
2118 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2120 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2122 return (native_encoding == encoding) ?
2123 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2124 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2127 // source encoding is utf32
2128 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2130 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2132 return (native_encoding == encoding) ?
2133 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2134 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2137 // source encoding is latin1
2138 if (encoding == encoding_latin1)
2139 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2141 assert(false && "Invalid encoding");
2145 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2147 const typename D::type* data = static_cast<const typename D::type*>(contents);
2148 size_t data_length = size / sizeof(typename D::type);
2150 // first pass: get length in utf8 units
2151 size_t length = D::process(data, data_length, 0, utf8_counter());
2153 // allocate buffer of suitable length
2154 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2155 if (!buffer) return false;
2157 // second pass: convert utf16 input to utf8
2158 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2159 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2161 assert(oend == obegin + length);
2164 out_buffer = buffer;
2165 out_length = length + 1;
2170 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2172 for (size_t i = 0; i < size; ++i)
2179 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2181 const uint8_t* data = static_cast<const uint8_t*>(contents);
2182 size_t data_length = size;
2184 // get size of prefix that does not need utf8 conversion
2185 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2186 assert(prefix_length <= data_length);
2188 const uint8_t* postfix = data + prefix_length;
2189 size_t postfix_length = data_length - prefix_length;
2191 // if no conversion is needed, just return the original buffer
2192 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2194 // first pass: get length in utf8 units
2195 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2197 // allocate buffer of suitable length
2198 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2199 if (!buffer) return false;
2201 // second pass: convert latin1 input to utf8
2202 memcpy(buffer, data, prefix_length);
2204 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2205 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2207 assert(oend == obegin + length);
2210 out_buffer = buffer;
2211 out_length = length + 1;
2216 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2218 // fast path: no conversion required
2219 if (encoding == encoding_utf8)
2220 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2222 // source encoding is utf16
2223 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2225 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2227 return (native_encoding == encoding) ?
2228 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2229 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2232 // source encoding is utf32
2233 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2235 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2237 return (native_encoding == encoding) ?
2238 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2239 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2242 // source encoding is latin1
2243 if (encoding == encoding_latin1)
2244 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2246 assert(false && "Invalid encoding");
2251 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2253 // get length in utf8 characters
2254 return wchar_decoder::process(str, length, 0, utf8_counter());
2257 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2260 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2261 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2263 assert(begin + size == end);
2268 #ifndef PUGIXML_NO_STL
2269 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2271 // first pass: get length in utf8 characters
2272 size_t size = as_utf8_begin(str, length);
2274 // allocate resulting string
2276 result.resize(size);
2278 // second pass: convert to utf8
2279 if (size > 0) as_utf8_end(&result[0], size, str, length);
2284 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2286 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2288 // first pass: get length in wchar_t units
2289 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2291 // allocate resulting string
2292 std::basic_string<wchar_t> result;
2293 result.resize(length);
2295 // second pass: convert to wchar_t
2298 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2299 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2301 assert(begin + length == end);
2309 template <typename Header>
2310 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2312 // never reuse shared memory
2313 if (header & xml_memory_page_contents_shared_mask) return false;
2315 size_t target_length = strlength(target);
2317 // always reuse document buffer memory if possible
2318 if ((header & header_mask) == 0) return target_length >= length;
2320 // reuse heap memory if waste is not too great
2321 const size_t reuse_threshold = 32;
2323 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2326 template <typename String, typename Header>
2327 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2329 if (source_length == 0)
2331 // empty string and null pointer are equivalent, so just deallocate old memory
2332 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2334 if (header & header_mask) alloc->deallocate_string(dest);
2336 // mark the string as not allocated
2338 header &= ~header_mask;
2342 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2344 // we can reuse old buffer, so just copy the new data (including zero terminator)
2345 memcpy(dest, source, source_length * sizeof(char_t));
2346 dest[source_length] = 0;
2352 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2354 if (!alloc->reserve()) return false;
2356 // allocate new buffer
2357 char_t* buf = alloc->allocate_string(source_length + 1);
2358 if (!buf) return false;
2360 // copy the string (including zero terminator)
2361 memcpy(buf, source, source_length * sizeof(char_t));
2362 buf[source_length] = 0;
2364 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2365 if (header & header_mask && dest) alloc->deallocate_string(dest);
2367 // the string is now allocated, so set the flag
2369 header |= header_mask;
2380 gap(): end(0), size(0)
2384 // Push new gap, move s count bytes further (skipping the gap).
2385 // Collapse previous gap.
2386 void push(char_t*& s, size_t count)
2388 if (end) // there was a gap already; collapse it
2390 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2392 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2395 s += count; // end of current gap
2402 // Collapse all gaps, return past-the-end pointer
2403 char_t* flush(char_t* s)
2407 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2409 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2417 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2419 char_t* stre = s + 1;
2425 unsigned int ucsc = 0;
2427 if (stre[1] == 'x') // &#x... (hex code)
2433 if (ch == ';') return stre;
2437 if (static_cast<unsigned int>(ch - '0') <= 9)
2438 ucsc = 16 * ucsc + (ch - '0');
2439 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2440 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2451 else // &#... (dec code)
2453 char_t ch = *++stre;
2455 if (ch == ';') return stre;
2459 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2460 ucsc = 10 * ucsc + (ch - '0');
2472 #ifdef PUGIXML_WCHAR_MODE
2473 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2475 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2478 g.push(s, stre - s);
2486 if (*stre == 'm') // &am
2488 if (*++stre == 'p' && *++stre == ';') // &
2493 g.push(s, stre - s);
2497 else if (*stre == 'p') // &ap
2499 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
2504 g.push(s, stre - s);
2513 if (*++stre == 't' && *++stre == ';') // >
2518 g.push(s, stre - s);
2526 if (*++stre == 't' && *++stre == ';') // <
2531 g.push(s, stre - s);
2539 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
2544 g.push(s, stre - s);
2558 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2559 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2560 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2561 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2562 #define PUGI__POPNODE() { cursor = cursor->parent; }
2563 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2564 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2565 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2566 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2567 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2568 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2570 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2576 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2578 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2580 *s++ = '\n'; // replace first one with 0x0a
2582 if (*s == '\n') g.push(s, 1);
2584 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2588 return s + (s[2] == '>' ? 3 : 2);
2598 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2604 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2606 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2608 *s++ = '\n'; // replace first one with 0x0a
2610 if (*s == '\n') g.push(s, 1);
2612 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2626 typedef char_t* (*strconv_pcdata_t)(char_t*);
2628 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2630 static char_t* parse(char_t* s)
2638 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2640 if (*s == '<') // PCDATA ends here
2642 char_t* end = g.flush(s);
2644 if (opt_trim::value)
2645 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2652 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2654 *s++ = '\n'; // replace first one with 0x0a
2656 if (*s == '\n') g.push(s, 1);
2658 else if (opt_escape::value && *s == '&')
2660 s = strconv_escape(s, g);
2664 char_t* end = g.flush(s);
2666 if (opt_trim::value)
2667 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2679 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2681 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2683 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2685 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2686 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2687 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2688 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2689 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2690 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2691 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2692 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2693 default: assert(false); return 0; // should not get here
2697 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2699 template <typename opt_escape> struct strconv_attribute_impl
2701 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2705 // trim leading whitespaces
2706 if (PUGI__IS_CHARTYPE(*s, ct_space))
2711 while (PUGI__IS_CHARTYPE(*str, ct_space));
2718 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2720 if (*s == end_quote)
2722 char_t* str = g.flush(s);
2725 while (PUGI__IS_CHARTYPE(*str, ct_space));
2729 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2733 if (PUGI__IS_CHARTYPE(*s, ct_space))
2735 char_t* str = s + 1;
2736 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2741 else if (opt_escape::value && *s == '&')
2743 s = strconv_escape(s, g);
2753 static char_t* parse_wconv(char_t* s, char_t end_quote)
2759 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2761 if (*s == end_quote)
2767 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2773 if (*s == '\n') g.push(s, 1);
2777 else if (opt_escape::value && *s == '&')
2779 s = strconv_escape(s, g);
2789 static char_t* parse_eol(char_t* s, char_t end_quote)
2795 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2797 if (*s == end_quote)
2803 else if (*s == '\r')
2807 if (*s == '\n') g.push(s, 1);
2809 else if (opt_escape::value && *s == '&')
2811 s = strconv_escape(s, g);
2821 static char_t* parse_simple(char_t* s, char_t end_quote)
2827 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2829 if (*s == end_quote)
2835 else if (opt_escape::value && *s == '&')
2837 s = strconv_escape(s, g);
2848 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2850 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2852 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2854 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2855 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2856 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2857 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2858 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2859 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2860 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2861 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2862 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2863 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2864 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2865 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2866 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2867 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2868 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2869 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2870 default: assert(false); return 0; // should not get here
2874 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2876 xml_parse_result result;
2877 result.status = status;
2878 result.offset = offset;
2885 xml_allocator alloc;
2886 xml_allocator* alloc_state;
2887 char_t* error_offset;
2888 xml_parse_status error_status;
2890 xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
2896 *alloc_state = alloc;
2899 // DOCTYPE consists of nested sections of the following possible types:
2900 // <!-- ... -->, <? ... ?>, "...", '...'
2903 // First group can not contain nested groups
2904 // Second group can contain nested groups of the same type
2905 // Third group can contain all other groups
2906 char_t* parse_doctype_primitive(char_t* s)
2908 if (*s == '"' || *s == '\'')
2912 PUGI__SCANFOR(*s == ch);
2913 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2917 else if (s[0] == '<' && s[1] == '?')
2921 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2922 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2926 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2929 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2930 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2934 else PUGI__THROW_ERROR(status_bad_doctype, s);
2939 char_t* parse_doctype_ignore(char_t* s)
2943 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2948 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2950 // nested ignore section
2954 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2956 // ignore section end
2967 PUGI__THROW_ERROR(status_bad_doctype, s);
2970 char_t* parse_doctype_group(char_t* s, char_t endch)
2974 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2979 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2984 s = parse_doctype_ignore(s);
2989 // some control group
2994 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2996 // unknown tag (forbidden), or some primitive group
2997 s = parse_doctype_primitive(s);
3011 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3016 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3018 // parse node contents, starting with exclamation mark
3021 if (*s == '-') // '<!-...'
3025 if (*s == '-') // '<!--...'
3029 if (PUGI__OPTSET(parse_comments))
3031 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3032 cursor->value = s; // Save the offset.
3035 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3037 s = strconv_comment(s, endch);
3039 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3043 // Scan for terminating '-->'.
3044 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3045 PUGI__CHECK_ERROR(status_bad_comment, s);
3047 if (PUGI__OPTSET(parse_comments))
3048 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3050 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3053 else PUGI__THROW_ERROR(status_bad_comment, s);
3058 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3062 if (PUGI__OPTSET(parse_cdata))
3064 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3065 cursor->value = s; // Save the offset.
3067 if (PUGI__OPTSET(parse_eol))
3069 s = strconv_cdata(s, endch);
3071 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3075 // Scan for terminating ']]>'.
3076 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3077 PUGI__CHECK_ERROR(status_bad_cdata, s);
3079 *s++ = 0; // Zero-terminate this segment.
3082 else // Flagged for discard, but we still have to scan for the terminator.
3084 // Scan for terminating ']]>'.
3085 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3086 PUGI__CHECK_ERROR(status_bad_cdata, s);
3091 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3093 else PUGI__THROW_ERROR(status_bad_cdata, s);
3095 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3099 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3101 char_t* mark = s + 9;
3103 s = parse_doctype_group(s, endch);
3106 assert((*s == 0 && endch == '>') || *s == '>');
3109 if (PUGI__OPTSET(parse_doctype))
3111 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3113 PUGI__PUSHNODE(node_doctype);
3115 cursor->value = mark;
3118 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3119 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3120 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3125 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3127 // load into registers
3128 xml_node_struct* cursor = ref_cursor;
3131 // parse node contents, starting with question mark
3137 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3139 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3140 PUGI__CHECK_ERROR(status_bad_pi, s);
3142 // determine node type; stricmp / strcasecmp is not portable
3143 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3145 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3149 // disallow non top-level declarations
3150 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3152 PUGI__PUSHNODE(node_declaration);
3156 PUGI__PUSHNODE(node_pi);
3159 cursor->name = target;
3163 // parse value/attributes
3167 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3172 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3179 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3180 PUGI__CHECK_ERROR(status_bad_pi, s);
3184 // replace ending ? with / so that 'element' terminates properly
3187 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3192 // store value and step over >
3193 cursor->value = value;
3202 else PUGI__THROW_ERROR(status_bad_pi, s);
3207 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3208 PUGI__CHECK_ERROR(status_bad_pi, s);
3210 s += (s[1] == '>' ? 2 : 1);
3213 // store from registers
3214 ref_cursor = cursor;
3219 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3221 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3222 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3223 if (!strconv_pcdata) {
3227 xml_node_struct* cursor = root;
3237 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3239 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3243 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3244 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3250 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3255 PUGI__SKIPWS(); // Eat any whitespace.
3257 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3259 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
3260 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3262 a->name = s; // Save the offset.
3264 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3267 if (PUGI__IS_CHARTYPE(ch, ct_space))
3269 PUGI__SKIPWS(); // Eat any whitespace.
3275 if (ch == '=') // '<... #=...'
3277 PUGI__SKIPWS(); // Eat any whitespace.
3279 if (*s == '"' || *s == '\'') // '<... #="...'
3281 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3282 ++s; // Step over the quote.
3283 a->value = s; // Save the offset.
3285 s = strconv_attribute(s, ch);
3287 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3289 // After this line the loop continues from the start;
3290 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3291 // everything else will be detected
3292 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3294 else PUGI__THROW_ERROR(status_bad_attribute, s);
3296 else PUGI__THROW_ERROR(status_bad_attribute, s);
3308 else if (*s == 0 && endch == '>')
3313 else PUGI__THROW_ERROR(status_bad_start_element, s);
3321 else if (*s == 0 && endch == '>')
3325 else PUGI__THROW_ERROR(status_bad_start_element, s);
3330 else if (ch == '/') // '<#.../'
3332 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3334 PUGI__POPNODE(); // Pop.
3340 // we stepped over null terminator, backtrack & handle closing tag
3343 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3345 else PUGI__THROW_ERROR(status_bad_start_element, s);
3351 char_t* name = cursor->name;
3352 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3354 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3356 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3361 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3362 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
3365 PUGI__POPNODE(); // Pop.
3371 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3375 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3379 else if (*s == '?') // '<?...'
3381 s = parse_question(s, cursor, optmsk, endch);
3385 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3387 else if (*s == '!') // '<!...'
3389 s = parse_exclamation(s, cursor, optmsk, endch);
3392 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3393 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3397 mark = s; // Save this offset while searching for a terminator.
3399 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3401 if (*s == '<' || !*s)
3403 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3406 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3410 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3412 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3416 if (!PUGI__OPTSET(parse_trim_pcdata))
3419 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3421 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3423 cursor->value = s; // Save the offset.
3427 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3429 cursor->value = s; // Save the offset.
3431 PUGI__POPNODE(); // Pop since this is a standalone.
3434 s = strconv_pcdata(s);
3440 PUGI__SCANFOR(*s == '<'); // '...<'
3451 // check that last tag is closed
3452 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3457 #ifdef PUGIXML_WCHAR_MODE
3458 static char_t* parse_skip_bom(char_t* s)
3460 unsigned int bom = 0xfeff;
3461 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3464 static char_t* parse_skip_bom(char_t* s)
3466 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3470 static bool has_element_node_siblings(xml_node_struct* node)
3474 if (PUGI__NODETYPE(node) == node_element) return true;
3476 node = node->next_sibling;
3482 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3484 // early-out for empty documents
3486 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3488 // get last child of the root before parsing
3489 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3491 // create parser on stack
3492 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3494 // save last character and make buffer zero-terminated (speeds up parsing)
3495 char_t endch = buffer[length - 1];
3496 buffer[length - 1] = 0;
3498 // skip BOM to make sure it does not end up as part of parse output
3499 char_t* buffer_data = parse_skip_bom(buffer);
3501 // perform actual parsing
3502 parser.parse_tree(buffer_data, root, optmsk, endch);
3504 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3505 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3509 // since we removed last character, we have to handle the only possible false positive (stray <)
3511 return make_parse_result(status_unrecognized_tag, length - 1);
3513 // check if there are any element nodes parsed
3514 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3516 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3517 return make_parse_result(status_no_document_element, length - 1);
3521 // roll back offset if it occurs on a null terminator in the source buffer
3522 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3530 // Output facilities
3531 PUGI__FN xml_encoding get_write_native_encoding()
3533 #ifdef PUGIXML_WCHAR_MODE
3534 return get_wchar_encoding();
3536 return encoding_utf8;
3540 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3542 // replace wchar encoding with utf implementation
3543 if (encoding == encoding_wchar) return get_wchar_encoding();
3545 // replace utf16 encoding with utf16 with specific endianness
3546 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3548 // replace utf32 encoding with utf32 with specific endianness
3549 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3551 // only do autodetection if no explicit encoding is requested
3552 if (encoding != encoding_auto) return encoding;
3554 // assume utf8 encoding
3555 return encoding_utf8;
3558 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3560 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3562 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3564 return static_cast<size_t>(end - dest) * sizeof(*dest);
3567 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3569 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3571 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3575 for (typename T::value_type i = dest; i != end; ++i)
3576 *i = endian_swap(*i);
3579 return static_cast<size_t>(end - dest) * sizeof(*dest);
3582 #ifdef PUGIXML_WCHAR_MODE
3583 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3585 if (length < 1) return 0;
3587 // discard last character if it's the lead of a surrogate pair
3588 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3591 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3593 // only endian-swapping is required
3594 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3596 convert_wchar_endian_swap(r_char, data, length);
3598 return length * sizeof(char_t);
3602 if (encoding == encoding_utf8)
3603 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3606 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3608 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3610 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3614 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3616 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3618 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3621 // convert to latin1
3622 if (encoding == encoding_latin1)
3623 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3625 assert(false && "Invalid encoding");
3629 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3631 if (length < 5) return 0;
3633 for (size_t i = 1; i <= 4; ++i)
3635 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3637 // either a standalone character or a leading one
3638 if ((ch & 0xc0) != 0x80) return length - i;
3641 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3645 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3647 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3649 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3651 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3654 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3656 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3658 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3661 if (encoding == encoding_latin1)
3662 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3664 assert(false && "Invalid encoding");
3669 class xml_buffered_writer
3671 xml_buffered_writer(const xml_buffered_writer&);
3672 xml_buffered_writer& operator=(const xml_buffered_writer&);
3675 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3677 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3682 flush(buffer, bufsize);
3687 void flush(const char_t* data, size_t size)
3689 if (size == 0) return;
3691 // fast path, just write data
3692 if (encoding == get_write_native_encoding())
3693 writer.write(data, size * sizeof(char_t));
3697 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3698 assert(result <= sizeof(scratch));
3701 writer.write(scratch.data_u8, result);
3705 void write_direct(const char_t* data, size_t length)
3707 // flush the remaining buffer contents
3710 // handle large chunks
3711 if (length > bufcapacity)
3713 if (encoding == get_write_native_encoding())
3715 // fast path, can just write data chunk
3716 writer.write(data, length * sizeof(char_t));
3720 // need to convert in suitable chunks
3721 while (length > bufcapacity)
3723 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3724 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3725 size_t chunk_size = get_valid_length(data, bufcapacity);
3728 // convert chunk and write
3729 flush(data, chunk_size);
3733 length -= chunk_size;
3736 // small tail is copied below
3740 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3744 void write_buffer(const char_t* data, size_t length)
3746 size_t offset = bufsize;
3748 if (offset < bufcapacity && offset + length <= bufcapacity)
3750 memcpy(buffer + offset, data, length * sizeof(char_t));
3751 bufsize = offset + length;
3755 write_direct(data, length);
3759 void write_string(const char_t* data)
3761 // write the part of the string that fits in the buffer
3762 size_t offset = bufsize;
3764 while (*data && offset < bufcapacity)
3765 buffer[offset++] = *data++;
3768 if (offset < bufcapacity)
3774 // backtrack a bit if we have split the codepoint
3775 size_t length = offset - bufsize;
3776 size_t extra = length - get_valid_length(data - length, length);
3778 bufsize = offset - extra;
3780 write_direct(data - extra, strlength(data) + extra);
3784 void write(char_t d0)
3786 size_t offset = bufsize;
3787 if (offset > bufcapacity - 1) offset = flush();
3789 buffer[offset + 0] = d0;
3790 bufsize = offset + 1;
3793 void write(char_t d0, char_t d1)
3795 size_t offset = bufsize;
3796 if (offset > bufcapacity - 2) offset = flush();
3798 buffer[offset + 0] = d0;
3799 buffer[offset + 1] = d1;
3800 bufsize = offset + 2;
3803 void write(char_t d0, char_t d1, char_t d2)
3805 size_t offset = bufsize;
3806 if (offset > bufcapacity - 3) offset = flush();
3808 buffer[offset + 0] = d0;
3809 buffer[offset + 1] = d1;
3810 buffer[offset + 2] = d2;
3811 bufsize = offset + 3;
3814 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3816 size_t offset = bufsize;
3817 if (offset > bufcapacity - 4) offset = flush();
3819 buffer[offset + 0] = d0;
3820 buffer[offset + 1] = d1;
3821 buffer[offset + 2] = d2;
3822 buffer[offset + 3] = d3;
3823 bufsize = offset + 4;
3826 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3828 size_t offset = bufsize;
3829 if (offset > bufcapacity - 5) offset = flush();
3831 buffer[offset + 0] = d0;
3832 buffer[offset + 1] = d1;
3833 buffer[offset + 2] = d2;
3834 buffer[offset + 3] = d3;
3835 buffer[offset + 4] = d4;
3836 bufsize = offset + 5;
3839 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3841 size_t offset = bufsize;
3842 if (offset > bufcapacity - 6) offset = flush();
3844 buffer[offset + 0] = d0;
3845 buffer[offset + 1] = d1;
3846 buffer[offset + 2] = d2;
3847 buffer[offset + 3] = d3;
3848 buffer[offset + 4] = d4;
3849 buffer[offset + 5] = d5;
3850 bufsize = offset + 6;
3853 // utf8 maximum expansion: x4 (-> utf32)
3854 // utf16 maximum expansion: x2 (-> utf32)
3855 // utf32 maximum expansion: x1
3859 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3860 PUGIXML_MEMORY_OUTPUT_STACK
3865 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3868 char_t buffer[bufcapacity];
3872 uint8_t data_u8[4 * bufcapacity];
3873 uint16_t data_u16[2 * bufcapacity];
3874 uint32_t data_u32[bufcapacity];
3875 char_t data_char[bufcapacity];
3880 xml_encoding encoding;
3883 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3887 const char_t* prev = s;
3889 // While *s is a usual symbol
3890 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3892 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3898 writer.write('&', 'a', 'm', 'p', ';');
3902 writer.write('&', 'l', 't', ';');
3906 writer.write('&', 'g', 't', ';');
3910 writer.write('&', 'q', 'u', 'o', 't', ';');
3913 default: // s is not a usual symbol
3915 unsigned int ch = static_cast<unsigned int>(*s++);
3918 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3924 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3926 if (flags & format_no_escapes)
3927 writer.write_string(s);
3929 text_output_escaped(writer, s, type);
3932 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3936 writer.write('<', '!', '[', 'C', 'D');
3937 writer.write('A', 'T', 'A', '[');
3939 const char_t* prev = s;
3941 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3942 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3944 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3947 size_t bufLenght = static_cast<size_t>(s - prev);
3949 writer.write_buffer(prev, bufLenght);
3951 writer.write(']', ']', '>');
3956 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3958 switch (indent_length)
3962 for (unsigned int i = 0; i < depth; ++i)
3963 writer.write(indent[0]);
3969 for (unsigned int i = 0; i < depth; ++i)
3970 writer.write(indent[0], indent[1]);
3976 for (unsigned int i = 0; i < depth; ++i)
3977 writer.write(indent[0], indent[1], indent[2]);
3983 for (unsigned int i = 0; i < depth; ++i)
3984 writer.write(indent[0], indent[1], indent[2], indent[3]);
3990 for (unsigned int i = 0; i < depth; ++i)
3991 writer.write_buffer(indent, indent_length);
3996 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
3998 writer.write('<', '!', '-', '-');
4002 const char_t* prev = s;
4004 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4005 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4007 size_t bufLenght = static_cast<size_t>(s - prev);
4009 writer.write_buffer(prev, bufLenght);
4015 writer.write('-', ' ');
4020 writer.write('-', '-', '>');
4023 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4027 const char_t* prev = s;
4029 // look for ?> sequence - we can't output it since ?> terminates PI
4030 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4032 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4036 assert(s[0] == '?' && s[1] == '>');
4038 writer.write('?', ' ', '>');
4044 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4046 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4048 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4050 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4054 text_output_indent(writer, indent, indent_length, depth + 1);
4061 writer.write_string(a->name ? a->name + 0 : default_name);
4062 writer.write('=', '"');
4065 text_output(writer, a->value, ctx_special_attr, flags);
4071 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4073 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4074 const char_t* name = node->name ? node->name + 0 : default_name;
4077 writer.write_string(name);
4079 if (node->first_attribute)
4080 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4082 // element nodes can have value if parse_embed_pcdata was used
4085 if (!node->first_child)
4087 if ((flags & format_raw) == 0)
4090 writer.write('/', '>');
4105 text_output(writer, node->value, ctx_special_pcdata, flags);
4107 if (!node->first_child)
4109 writer.write('<', '/');
4110 writer.write_string(name);
4122 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4124 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4125 const char_t* name = node->name ? node->name + 0 : default_name;
4127 writer.write('<', '/');
4128 writer.write_string(name);
4132 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4134 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4136 switch (PUGI__NODETYPE(node))
4139 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4143 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4147 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4151 writer.write('<', '?');
4152 writer.write_string(node->name ? node->name + 0 : default_name);
4157 node_output_pi_value(writer, node->value);
4160 writer.write('?', '>');
4163 case node_declaration:
4164 writer.write('<', '?');
4165 writer.write_string(node->name ? node->name + 0 : default_name);
4166 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4167 writer.write('?', '>');
4171 writer.write('<', '!', 'D', 'O', 'C');
4172 writer.write('T', 'Y', 'P', 'E');
4177 writer.write_string(node->value);
4184 assert(false && "Invalid node type");
4194 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4196 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4197 unsigned int indent_flags = indent_indent;
4199 xml_node_struct* node = root;
4205 // begin writing current node
4206 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4208 node_output_simple(writer, node, flags);
4214 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4217 if ((indent_flags & indent_indent) && indent_length)
4218 text_output_indent(writer, indent, indent_length, depth);
4220 if (PUGI__NODETYPE(node) == node_element)
4222 indent_flags = indent_newline | indent_indent;
4224 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4226 // element nodes can have value if parse_embed_pcdata was used
4230 node = node->first_child;
4235 else if (PUGI__NODETYPE(node) == node_document)
4237 indent_flags = indent_indent;
4239 if (node->first_child)
4241 node = node->first_child;
4247 node_output_simple(writer, node, flags);
4249 indent_flags = indent_newline | indent_indent;
4253 // continue to the next node
4254 while (node != root)
4256 if (node->next_sibling)
4258 node = node->next_sibling;
4262 node = node->parent;
4264 // write closing node
4265 if (PUGI__NODETYPE(node) == node_element)
4269 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4272 if ((indent_flags & indent_indent) && indent_length)
4273 text_output_indent(writer, indent, indent_length, depth);
4275 node_output_end(writer, node);
4277 indent_flags = indent_newline | indent_indent;
4281 while (node != root);
4283 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4287 PUGI__FN bool has_declaration(xml_node_struct* node)
4289 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4291 xml_node_type type = PUGI__NODETYPE(child);
4293 if (type == node_declaration) return true;
4294 if (type == node_element) return false;
4300 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4302 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4309 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4311 return parent == node_element || parent == node_declaration;
4314 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4316 if (parent != node_document && parent != node_element) return false;
4317 if (child == node_document || child == node_null) return false;
4318 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4323 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4325 // check that child can be a child of parent
4326 if (!allow_insert_child(parent.type(), child.type()))
4329 // check that node is not moved between documents
4330 if (parent.root() != child.root())
4333 // check that new parent is not in the child subtree
4334 xml_node cur = parent;
4347 template <typename String, typename Header>
4348 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4350 assert(!dest && (header & header_mask) == 0);
4354 if (alloc && (source_header & header_mask) == 0)
4358 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4359 header |= xml_memory_page_contents_shared_mask;
4360 source_header |= xml_memory_page_contents_shared_mask;
4363 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4367 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4369 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4370 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4372 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4374 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4378 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4379 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4384 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4386 xml_allocator& alloc = get_allocator(dn);
4387 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4389 node_copy_contents(dn, sn, shared_alloc);
4391 xml_node_struct* dit = dn;
4392 xml_node_struct* sit = sn->first_child;
4394 while (sit && sit != sn)
4398 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4402 node_copy_contents(copy, sit, shared_alloc);
4404 if (sit->first_child)
4407 sit = sit->first_child;
4413 // continue to the next node
4416 if (sit->next_sibling)
4418 sit = sit->next_sibling;
4429 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4431 xml_allocator& alloc = get_allocator(da);
4432 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4434 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4435 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4438 inline bool is_text_node(xml_node_struct* node)
4440 xml_node_type type = PUGI__NODETYPE(node);
4442 return type == node_pcdata || type == node_cdata;
4445 // get value with conversion functions
4446 template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4449 const char_t* s = value;
4451 while (PUGI__IS_CHARTYPE(*s, ct_space))
4454 bool negative = (*s == '-');
4456 s += (*s == '+' || *s == '-');
4458 bool overflow = false;
4460 if (s[0] == '0' && (s[1] | ' ') == 'x')
4464 // since overflow detection relies on length of the sequence skip leading zeros
4468 const char_t* start = s;
4472 if (static_cast<unsigned>(*s - '0') < 10)
4473 result = result * 16 + (*s - '0');
4474 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4475 result = result * 16 + ((*s | ' ') - 'a' + 10);
4482 size_t digits = static_cast<size_t>(s - start);
4484 overflow = digits > sizeof(U) * 2;
4488 // since overflow detection relies on length of the sequence skip leading zeros
4492 const char_t* start = s;
4496 if (static_cast<unsigned>(*s - '0') < 10)
4497 result = result * 10 + (*s - '0');
4504 size_t digits = static_cast<size_t>(s - start);
4506 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4508 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4509 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4510 const size_t high_bit = sizeof(U) * 8 - 1;
4512 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4516 return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4518 return (overflow || result > maxpos) ? maxpos : result;
4521 PUGI__FN int get_value_int(const char_t* value)
4523 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4526 PUGI__FN unsigned int get_value_uint(const char_t* value)
4528 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4531 PUGI__FN double get_value_double(const char_t* value)
4533 #ifdef PUGIXML_WCHAR_MODE
4534 return wcstod(value, 0);
4536 return strtod(value, 0);
4540 PUGI__FN float get_value_float(const char_t* value)
4542 #ifdef PUGIXML_WCHAR_MODE
4543 return static_cast<float>(wcstod(value, 0));
4545 return static_cast<float>(strtod(value, 0));
4549 PUGI__FN bool get_value_bool(const char_t* value)
4551 // only look at first char
4552 char_t first = *value;
4554 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4555 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4558 #ifdef PUGIXML_HAS_LONG_LONG
4559 PUGI__FN long long get_value_llong(const char_t* value)
4561 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4564 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4566 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4570 template <typename T> struct make_unsigned;
4572 template <> struct make_unsigned<int> { typedef unsigned int type; };
4573 template <> struct make_unsigned<unsigned int> { typedef unsigned int type; };
4574 template <> struct make_unsigned<long> { typedef unsigned long type; };
4575 template <> struct make_unsigned<unsigned long> { typedef unsigned long type; };
4577 #ifdef PUGIXML_HAS_LONG_LONG
4578 template <> struct make_unsigned<long long> { typedef unsigned long long type; };
4579 template <> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
4582 template <typename T>
4583 PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, T value)
4585 typedef typename make_unsigned<T>::type U;
4587 bool negative = value < 0;
4589 char_t* result = end - 1;
4590 U rest = negative ? 0 - U(value) : U(value);
4594 *result-- = static_cast<char_t>('0' + (rest % 10));
4599 assert(result >= begin);
4604 return result + !negative;
4607 // set value with conversion functions
4608 template <typename String, typename Header>
4609 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4611 #ifdef PUGIXML_WCHAR_MODE
4613 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4616 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4618 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4620 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4624 template <typename String, typename Header, typename Integer>
4625 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, Integer value)
4628 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4629 char_t* begin = integer_to_string(buf, end, value);
4631 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4634 template <typename String, typename Header>
4635 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4638 sprintf(buf, "%.9g", value);
4640 return set_value_ascii(dest, header, header_mask, buf);
4643 template <typename String, typename Header>
4644 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4647 sprintf(buf, "%.17g", value);
4649 return set_value_ascii(dest, header, header_mask, buf);
4652 template <typename String, typename Header>
4653 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
4655 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4658 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4660 // check input buffer
4661 if (!contents && size) return make_parse_result(status_io_error);
4663 // get actual encoding
4664 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4666 // get private buffer
4671 xml_parse_result failed;
4672 failed.status = xml_parse_status::status_internal_error;
4673 failed.offset = (ptrdiff_t)0;
4677 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4679 // delete original buffer if we performed a conversion
4680 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4682 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4683 if (own || buffer != contents) *out_buffer = buffer;
4685 // store buffer for offset_debug
4686 doc->buffer = buffer;
4689 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4691 // remember encoding
4692 res.encoding = buffer_encoding;
4697 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4698 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4700 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4701 // there are 64-bit versions of fseek/ftell, let's use them
4702 typedef __int64 length_type;
4704 _fseeki64(file, 0, SEEK_END);
4705 length_type length = _ftelli64(file);
4706 _fseeki64(file, 0, SEEK_SET);
4707 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4708 // there are 64-bit versions of fseek/ftell, let's use them
4709 typedef off64_t length_type;
4711 fseeko64(file, 0, SEEK_END);
4712 length_type length = ftello64(file);
4713 fseeko64(file, 0, SEEK_SET);
4715 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4716 typedef long length_type;
4718 fseek(file, 0, SEEK_END);
4719 length_type length = ftell(file);
4720 fseek(file, 0, SEEK_SET);
4723 // check for I/O errors
4724 if (length < 0) return status_io_error;
4726 // check for overflow
4727 size_t result = static_cast<size_t>(length);
4729 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4732 out_result = result;
4737 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4738 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4740 // We only need to zero-terminate if encoding conversion does not do it for us
4741 #ifdef PUGIXML_WCHAR_MODE
4742 xml_encoding wchar_encoding = get_wchar_encoding();
4744 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4746 size_t length = size / sizeof(char_t);
4748 static_cast<char_t*>(buffer)[length] = 0;
4749 return (length + 1) * sizeof(char_t);
4752 if (encoding == encoding_utf8)
4754 static_cast<char*>(buffer)[size] = 0;
4762 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4764 if (!file) return make_parse_result(status_file_not_found);
4766 // get file size (can result in I/O errors)
4768 xml_parse_status size_status = get_file_size(file, size);
4769 if (size_status != status_ok) return make_parse_result(size_status);
4771 size_t max_suffix_size = sizeof(char_t);
4773 // allocate buffer for the whole file
4774 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4775 if (!contents) return make_parse_result(status_out_of_memory);
4777 // read file in memory
4778 size_t read_size = fread(contents, 1, size, file);
4780 if (read_size != size)
4782 xml_memory::deallocate(contents);
4783 return make_parse_result(status_io_error);
4786 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4788 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4791 PUGI__FN void close_file(FILE* file)
4796 #ifndef PUGIXML_NO_STL
4797 template <typename T> struct xml_stream_chunk
4799 static xml_stream_chunk* create()
4801 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4802 if (!memory) return 0;
4804 return new (memory) xml_stream_chunk();
4807 static void destroy(xml_stream_chunk* chunk)
4812 xml_stream_chunk* next_ = chunk->next;
4814 xml_memory::deallocate(chunk);
4820 xml_stream_chunk(): next(0), size(0)
4824 xml_stream_chunk* next;
4827 T data[xml_memory_page_size / sizeof(T)];
4830 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4832 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4834 // read file to a chunk list
4836 xml_stream_chunk<T>* last = 0;
4838 while (!stream.eof())
4840 // allocate new chunk
4841 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4842 if (!chunk) return status_out_of_memory;
4844 // append chunk to list
4845 if (last) last = last->next = chunk;
4846 else chunks.data = last = chunk;
4848 // read data to chunk
4849 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4850 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4852 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4853 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4855 // guard against huge files (chunk size is small enough to make this overflow check work)
4856 if (total + chunk->size < total) return status_out_of_memory;
4857 total += chunk->size;
4860 size_t max_suffix_size = sizeof(char_t);
4862 // copy chunk list to a contiguous buffer
4863 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4864 if (!buffer) return status_out_of_memory;
4866 char* write = buffer;
4868 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4870 assert(write + chunk->size <= buffer + total);
4871 memcpy(write, chunk->data, chunk->size);
4872 write += chunk->size;
4875 assert(write == buffer + total);
4878 *out_buffer = buffer;
4884 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4886 // get length of remaining data in stream
4887 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4888 stream.seekg(0, std::ios::end);
4889 std::streamoff length = stream.tellg() - pos;
4892 if (stream.fail() || pos < 0) return status_io_error;
4894 // guard against huge files
4895 size_t read_length = static_cast<size_t>(length);
4897 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4899 size_t max_suffix_size = sizeof(char_t);
4901 // read stream data into memory (guard against stream exceptions with buffer holder)
4902 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4903 if (!buffer.data) return status_out_of_memory;
4905 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4907 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4908 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4911 size_t actual_length = static_cast<size_t>(stream.gcount());
4912 assert(actual_length <= read_length);
4914 *out_buffer = buffer.release();
4915 *out_size = actual_length * sizeof(T);
4920 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4924 xml_parse_status status = status_ok;
4926 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4927 if (stream.fail()) return make_parse_result(status_io_error);
4929 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4930 if (stream.tellg() < 0)
4932 stream.clear(); // clear error flags that could be set by a failing tellg
4933 status = load_stream_data_noseek(stream, &buffer, &size);
4936 status = load_stream_data_seek(stream, &buffer, &size);
4938 if (status != status_ok) return make_parse_result(status);
4940 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4942 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4946 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4947 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4949 return _wfopen(path, mode);
4952 PUGI__FN char* convert_path_heap(const wchar_t* str)
4956 // first pass: get length in utf8 characters
4957 size_t length = strlength_wide(str);
4958 size_t size = as_utf8_begin(str, length);
4960 // allocate resulting string
4961 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4962 if (!result) return 0;
4964 // second pass: convert to utf8
4965 as_utf8_end(result, size, str, length);
4973 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4975 // there is no standard function to open wide paths, so our best bet is to try utf8 path
4976 char* path_utf8 = convert_path_heap(path);
4977 if (!path_utf8) return 0;
4979 // convert mode to ASCII (we mirror _wfopen interface)
4980 char mode_ascii[4] = {0};
4981 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4983 // try to open the utf8 path
4984 FILE* result = fopen(path_utf8, mode_ascii);
4986 // free dummy buffer
4987 xml_memory::deallocate(path_utf8);
4993 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
4995 if (!file) return false;
4997 xml_writer_file writer(file);
4998 doc.save(writer, indent, flags, encoding);
5000 return ferror(file) == 0;
5003 struct name_null_sentry
5005 xml_node_struct* node;
5008 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5022 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5026 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5028 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5029 (void)!result; // unfortunately we can't do proper error handling here
5032 #ifndef PUGIXML_NO_STL
5033 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5037 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5041 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5045 assert(!wide_stream);
5046 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5050 assert(wide_stream);
5051 assert(size % sizeof(wchar_t) == 0);
5053 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5058 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5062 PUGI__FN xml_tree_walker::~xml_tree_walker()
5066 PUGI__FN int xml_tree_walker::depth() const
5071 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5076 PUGI__FN bool xml_tree_walker::end(xml_node&)
5081 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5085 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5089 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5093 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5095 return _attr ? unspecified_bool_xml_attribute : 0;
5098 PUGI__FN bool xml_attribute::operator!() const
5103 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5105 return (_attr == r._attr);
5108 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5110 return (_attr != r._attr);
5113 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5115 return (_attr < r._attr);
5118 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5120 return (_attr > r._attr);
5123 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5125 return (_attr <= r._attr);
5128 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5130 return (_attr >= r._attr);
5133 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5135 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5138 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5140 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5143 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5145 return (_attr && _attr->value) ? _attr->value + 0 : def;
5148 PUGI__FN int xml_attribute::as_int(int def) const
5150 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5153 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5155 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5158 PUGI__FN double xml_attribute::as_double(double def) const
5160 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5163 PUGI__FN float xml_attribute::as_float(float def) const
5165 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5168 PUGI__FN bool xml_attribute::as_bool(bool def) const
5170 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5173 #ifdef PUGIXML_HAS_LONG_LONG
5174 PUGI__FN long long xml_attribute::as_llong(long long def) const
5176 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5179 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5181 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5185 PUGI__FN bool xml_attribute::empty() const
5190 PUGI__FN const char_t* xml_attribute::name() const
5192 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5195 PUGI__FN const char_t* xml_attribute::value() const
5197 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5200 PUGI__FN size_t xml_attribute::hash_value() const
5202 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5205 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5210 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5216 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5222 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5228 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5234 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5240 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5246 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5252 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5258 #ifdef PUGIXML_HAS_LONG_LONG
5259 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5265 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5272 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5274 if (!_attr) return false;
5276 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5279 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5281 if (!_attr) return false;
5283 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5286 PUGI__FN bool xml_attribute::set_value(int rhs)
5288 if (!_attr) return false;
5290 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5293 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5295 if (!_attr) return false;
5297 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5300 PUGI__FN bool xml_attribute::set_value(long rhs)
5302 if (!_attr) return false;
5304 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5307 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5309 if (!_attr) return false;
5311 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5314 PUGI__FN bool xml_attribute::set_value(double rhs)
5316 if (!_attr) return false;
5318 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5321 PUGI__FN bool xml_attribute::set_value(float rhs)
5323 if (!_attr) return false;
5325 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5328 PUGI__FN bool xml_attribute::set_value(bool rhs)
5330 if (!_attr) return false;
5332 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5335 #ifdef PUGIXML_HAS_LONG_LONG
5336 PUGI__FN bool xml_attribute::set_value(long long rhs)
5338 if (!_attr) return false;
5340 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5343 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5345 if (!_attr) return false;
5347 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5352 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5354 return (bool)lhs && rhs;
5357 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5359 return (bool)lhs || rhs;
5363 PUGI__FN xml_node::xml_node(): _root(0)
5367 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5371 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5375 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5377 return _root ? unspecified_bool_xml_node : 0;
5380 PUGI__FN bool xml_node::operator!() const
5385 PUGI__FN xml_node::iterator xml_node::begin() const
5387 return iterator(_root ? _root->first_child + 0 : 0, _root);
5390 PUGI__FN xml_node::iterator xml_node::end() const
5392 return iterator(0, _root);
5395 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5397 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5400 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5402 return attribute_iterator(0, _root);
5405 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5407 return xml_object_range<xml_node_iterator>(begin(), end());
5410 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5412 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5415 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5417 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5420 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5422 return (_root == r._root);
5425 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5427 return (_root != r._root);
5430 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5432 return (_root < r._root);
5435 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5437 return (_root > r._root);
5440 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5442 return (_root <= r._root);
5445 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5447 return (_root >= r._root);
5450 PUGI__FN bool xml_node::empty() const
5455 PUGI__FN const char_t* xml_node::name() const
5457 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5460 PUGI__FN xml_node_type xml_node::type() const
5462 return _root ? PUGI__NODETYPE(_root) : node_null;
5465 PUGI__FN const char_t* xml_node::value() const
5467 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5470 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5472 if (!_root) return xml_node();
5474 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5475 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5480 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5482 if (!_root) return xml_attribute();
5484 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5485 if (i->name && impl::strequal(name_, i->name))
5486 return xml_attribute(i);
5488 return xml_attribute();
5491 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5493 if (!_root) return xml_node();
5495 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5496 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5501 PUGI__FN xml_node xml_node::next_sibling() const
5503 return _root ? xml_node(_root->next_sibling) : xml_node();
5506 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5508 if (!_root) return xml_node();
5510 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5511 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5516 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5518 xml_attribute_struct* hint = hint_._attr;
5520 // if hint is not an attribute of node, behavior is not defined
5521 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5523 if (!_root) return xml_attribute();
5525 // optimistically search from hint up until the end
5526 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5527 if (i->name && impl::strequal(name_, i->name))
5529 // update hint to maximize efficiency of searching for consecutive attributes
5530 hint_._attr = i->next_attribute;
5532 return xml_attribute(i);
5535 // wrap around and search from the first attribute until the hint
5536 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5537 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5538 if (j->name && impl::strequal(name_, j->name))
5540 // update hint to maximize efficiency of searching for consecutive attributes
5541 hint_._attr = j->next_attribute;
5543 return xml_attribute(j);
5546 return xml_attribute();
5549 PUGI__FN xml_node xml_node::previous_sibling() const
5551 if (!_root) return xml_node();
5553 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5554 else return xml_node();
5557 PUGI__FN xml_node xml_node::parent() const
5559 return _root ? xml_node(_root->parent) : xml_node();
5562 PUGI__FN xml_node xml_node::root() const
5564 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5567 PUGI__FN xml_text xml_node::text() const
5569 return xml_text(_root);
5572 PUGI__FN const char_t* xml_node::child_value() const
5574 if (!_root) return PUGIXML_TEXT("");
5576 // element nodes can have value if parse_embed_pcdata was used
5577 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5578 return _root->value;
5580 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5581 if (impl::is_text_node(i) && i->value)
5584 return PUGIXML_TEXT("");
5587 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5589 return child(name_).child_value();
5592 PUGI__FN xml_attribute xml_node::first_attribute() const
5594 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5597 PUGI__FN xml_attribute xml_node::last_attribute() const
5599 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5602 PUGI__FN xml_node xml_node::first_child() const
5604 return _root ? xml_node(_root->first_child) : xml_node();
5607 PUGI__FN xml_node xml_node::last_child() const
5609 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5612 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5614 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5616 if (!_root && type_ != node_element && type_ != node_pi && type_ != node_declaration)
5619 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5622 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5624 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5626 if (!_root && type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5629 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5632 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5634 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5636 impl::xml_allocator& alloc = impl::get_allocator(_root);
5637 if (!alloc.reserve()) return xml_attribute();
5639 xml_attribute a(impl::allocate_attribute(alloc));
5640 if (!a) return xml_attribute();
5642 impl::append_attribute(a._attr, _root);
5649 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5651 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5653 impl::xml_allocator& alloc = impl::get_allocator(_root);
5654 if (!alloc.reserve()) return xml_attribute();
5656 xml_attribute a(impl::allocate_attribute(alloc));
5657 if (!a) return xml_attribute();
5659 impl::prepend_attribute(a._attr, _root);
5666 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5668 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5669 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5671 impl::xml_allocator& alloc = impl::get_allocator(_root);
5672 if (!alloc.reserve()) return xml_attribute();
5674 xml_attribute a(impl::allocate_attribute(alloc));
5675 if (!a) return xml_attribute();
5677 impl::insert_attribute_after(a._attr, attr._attr, _root);
5684 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5686 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5687 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5689 impl::xml_allocator& alloc = impl::get_allocator(_root);
5690 if (!alloc.reserve()) return xml_attribute();
5692 xml_attribute a(impl::allocate_attribute(alloc));
5693 if (!a) return xml_attribute();
5695 impl::insert_attribute_before(a._attr, attr._attr, _root);
5702 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5704 if (!proto) return xml_attribute();
5705 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5707 impl::xml_allocator& alloc = impl::get_allocator(_root);
5708 if (!alloc.reserve()) return xml_attribute();
5710 xml_attribute a(impl::allocate_attribute(alloc));
5711 if (!a) return xml_attribute();
5713 impl::append_attribute(a._attr, _root);
5714 impl::node_copy_attribute(a._attr, proto._attr);
5719 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5721 if (!proto) return xml_attribute();
5722 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5724 impl::xml_allocator& alloc = impl::get_allocator(_root);
5725 if (!alloc.reserve()) return xml_attribute();
5727 xml_attribute a(impl::allocate_attribute(alloc));
5728 if (!a) return xml_attribute();
5730 impl::prepend_attribute(a._attr, _root);
5731 impl::node_copy_attribute(a._attr, proto._attr);
5736 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5738 if (!proto) return xml_attribute();
5739 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5740 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5742 impl::xml_allocator& alloc = impl::get_allocator(_root);
5743 if (!alloc.reserve()) return xml_attribute();
5745 xml_attribute a(impl::allocate_attribute(alloc));
5746 if (!a) return xml_attribute();
5748 impl::insert_attribute_after(a._attr, attr._attr, _root);
5749 impl::node_copy_attribute(a._attr, proto._attr);
5754 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5756 if (!proto) return xml_attribute();
5757 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5758 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5760 impl::xml_allocator& alloc = impl::get_allocator(_root);
5761 if (!alloc.reserve()) return xml_attribute();
5763 xml_attribute a(impl::allocate_attribute(alloc));
5764 if (!a) return xml_attribute();
5766 impl::insert_attribute_before(a._attr, attr._attr, _root);
5767 impl::node_copy_attribute(a._attr, proto._attr);
5772 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5774 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5776 impl::xml_allocator& alloc = impl::get_allocator(_root);
5777 if (!alloc.reserve()) return xml_node();
5779 xml_node n(impl::allocate_node(alloc, type_));
5780 if (!n) return xml_node();
5782 impl::append_node(n._root, _root);
5784 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5789 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5791 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5793 impl::xml_allocator& alloc = impl::get_allocator(_root);
5794 if (!alloc.reserve()) return xml_node();
5796 xml_node n(impl::allocate_node(alloc, type_));
5797 if (!n) return xml_node();
5799 impl::prepend_node(n._root, _root);
5801 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5806 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5808 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5809 if (!node._root || node._root->parent != _root) return xml_node();
5811 impl::xml_allocator& alloc = impl::get_allocator(_root);
5812 if (!alloc.reserve()) return xml_node();
5814 xml_node n(impl::allocate_node(alloc, type_));
5815 if (!n) return xml_node();
5817 impl::insert_node_before(n._root, node._root);
5819 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5824 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5826 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5827 if (!node._root || node._root->parent != _root) return xml_node();
5829 impl::xml_allocator& alloc = impl::get_allocator(_root);
5830 if (!alloc.reserve()) return xml_node();
5832 xml_node n(impl::allocate_node(alloc, type_));
5833 if (!n) return xml_node();
5835 impl::insert_node_after(n._root, node._root);
5837 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5842 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5844 xml_node result = append_child(node_element);
5846 result.set_name(name_);
5851 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5853 xml_node result = prepend_child(node_element);
5855 result.set_name(name_);
5860 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5862 xml_node result = insert_child_after(node_element, node);
5864 result.set_name(name_);
5869 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5871 xml_node result = insert_child_before(node_element, node);
5873 result.set_name(name_);
5878 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5880 xml_node_type type_ = proto.type();
5881 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5883 impl::xml_allocator& alloc = impl::get_allocator(_root);
5884 if (!alloc.reserve()) return xml_node();
5886 xml_node n(impl::allocate_node(alloc, type_));
5887 if (!n) return xml_node();
5889 impl::append_node(n._root, _root);
5890 impl::node_copy_tree(n._root, proto._root);
5895 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5897 xml_node_type type_ = proto.type();
5898 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5900 impl::xml_allocator& alloc = impl::get_allocator(_root);
5901 if (!alloc.reserve()) return xml_node();
5903 xml_node n(impl::allocate_node(alloc, type_));
5904 if (!n) return xml_node();
5906 impl::prepend_node(n._root, _root);
5907 impl::node_copy_tree(n._root, proto._root);
5912 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5914 xml_node_type type_ = proto.type();
5915 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5916 if (!node._root || node._root->parent != _root) return xml_node();
5918 impl::xml_allocator& alloc = impl::get_allocator(_root);
5919 if (!alloc.reserve()) return xml_node();
5921 xml_node n(impl::allocate_node(alloc, type_));
5922 if (!n) return xml_node();
5924 impl::insert_node_after(n._root, node._root);
5925 impl::node_copy_tree(n._root, proto._root);
5930 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5932 xml_node_type type_ = proto.type();
5933 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5934 if (!node._root || node._root->parent != _root) return xml_node();
5936 impl::xml_allocator& alloc = impl::get_allocator(_root);
5937 if (!alloc.reserve()) return xml_node();
5939 xml_node n(impl::allocate_node(alloc, type_));
5940 if (!n) return xml_node();
5942 impl::insert_node_before(n._root, node._root);
5943 impl::node_copy_tree(n._root, proto._root);
5948 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5950 if (!impl::allow_move(*this, moved)) return xml_node();
5952 impl::xml_allocator& alloc = impl::get_allocator(_root);
5953 if (!alloc.reserve()) return xml_node();
5955 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5956 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5958 impl::remove_node(moved._root);
5959 impl::append_node(moved._root, _root);
5964 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5966 if (!impl::allow_move(*this, moved)) return xml_node();
5968 impl::xml_allocator& alloc = impl::get_allocator(_root);
5969 if (!alloc.reserve()) return xml_node();
5971 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5972 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5974 impl::remove_node(moved._root);
5975 impl::prepend_node(moved._root, _root);
5980 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5982 if (!impl::allow_move(*this, moved)) return xml_node();
5983 if (!node._root || node._root->parent != _root) return xml_node();
5984 if (moved._root == node._root) return xml_node();
5986 impl::xml_allocator& alloc = impl::get_allocator(_root);
5987 if (!alloc.reserve()) return xml_node();
5989 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5990 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5992 impl::remove_node(moved._root);
5993 impl::insert_node_after(moved._root, node._root);
5998 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6000 if (!impl::allow_move(*this, moved)) return xml_node();
6001 if (!node._root || node._root->parent != _root) return xml_node();
6002 if (moved._root == node._root) return xml_node();
6004 impl::xml_allocator& alloc = impl::get_allocator(_root);
6005 if (!alloc.reserve()) return xml_node();
6007 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6008 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6010 impl::remove_node(moved._root);
6011 impl::insert_node_before(moved._root, node._root);
6016 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6018 return remove_attribute(attribute(name_));
6021 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6023 if (!_root || !a._attr) return false;
6024 if (!impl::is_attribute_of(a._attr, _root)) return false;
6026 impl::xml_allocator& alloc = impl::get_allocator(_root);
6027 if (!alloc.reserve()) return false;
6029 impl::remove_attribute(a._attr, _root);
6030 impl::destroy_attribute(a._attr, alloc);
6035 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6037 return remove_child(child(name_));
6040 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6042 if (!_root || !n._root || n._root->parent != _root) return false;
6044 impl::xml_allocator& alloc = impl::get_allocator(_root);
6045 if (!alloc.reserve()) return false;
6047 impl::remove_node(n._root);
6048 impl::destroy_node(n._root, alloc);
6053 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6055 // append_buffer is only valid for elements/documents
6056 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6058 // get document node
6059 impl::xml_document_struct* doc = &impl::get_document(_root);
6061 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6062 doc->header |= impl::xml_memory_page_contents_shared_mask;
6064 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6065 impl::xml_memory_page* page = 0;
6066 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
6069 if (!extra) return impl::make_parse_result(status_out_of_memory);
6071 // add extra buffer to the list
6073 extra->next = doc->extra_buffers;
6074 doc->extra_buffers = extra;
6076 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6077 impl::name_null_sentry sentry(_root);
6079 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6082 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6084 if (!_root) return xml_node();
6086 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6087 if (i->name && impl::strequal(name_, i->name))
6089 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6090 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6097 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6099 if (!_root) return xml_node();
6101 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6102 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6103 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6109 #ifndef PUGIXML_NO_STL
6110 PUGI__FN string_t xml_node::path(char_t delimiter) const
6112 if (!_root) return string_t();
6116 for (xml_node_struct* i = _root; i; i = i->parent)
6118 offset += (i != _root);
6119 offset += i->name ? impl::strlength(i->name) : 0;
6123 result.resize(offset);
6125 for (xml_node_struct* j = _root; j; j = j->parent)
6128 result[--offset] = delimiter;
6130 if (j->name && *j->name)
6132 size_t length = impl::strlength(j->name);
6135 memcpy(&result[offset], j->name, length * sizeof(char_t));
6139 assert(offset == 0);
6145 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6147 xml_node found = *this; // Current search context.
6149 if (!_root || !path_ || !path_[0]) return found;
6151 if (path_[0] == delimiter)
6153 // Absolute path; e.g. '/foo/bar'
6154 found = found.root();
6158 const char_t* path_segment = path_;
6160 while (*path_segment == delimiter) ++path_segment;
6162 const char_t* path_segment_end = path_segment;
6164 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6166 if (path_segment == path_segment_end) return found;
6168 const char_t* next_segment = path_segment_end;
6170 while (*next_segment == delimiter) ++next_segment;
6172 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6173 return found.first_element_by_path(next_segment, delimiter);
6174 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6175 return found.parent().first_element_by_path(next_segment, delimiter);
6179 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6181 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6183 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6185 if (subsearch) return subsearch;
6194 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6198 xml_node arg_begin = *this;
6199 if (!walker.begin(arg_begin)) return false;
6201 xml_node cur = first_child();
6209 xml_node arg_for_each = cur;
6210 if (!walker.for_each(arg_for_each))
6213 if (cur.first_child())
6216 cur = cur.first_child();
6218 else if (cur.next_sibling())
6219 cur = cur.next_sibling();
6222 // Borland C++ workaround
6223 while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
6230 cur = cur.next_sibling();
6233 while (cur && cur != *this);
6236 assert(walker._depth == -1);
6238 xml_node arg_end = *this;
6239 return walker.end(arg_end);
6242 PUGI__FN size_t xml_node::hash_value() const
6244 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6247 PUGI__FN xml_node_struct* xml_node::internal_object() const
6252 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6256 impl::xml_buffered_writer buffered_writer(writer, encoding);
6258 impl::node_output(buffered_writer, _root, indent, flags, depth);
6260 buffered_writer.flush();
6263 #ifndef PUGIXML_NO_STL
6264 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6266 xml_writer_stream writer(stream);
6268 print(writer, indent, flags, encoding, depth);
6271 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6273 xml_writer_stream writer(stream);
6275 print(writer, indent, flags, encoding_wchar, depth);
6279 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6281 if (!_root) return -1;
6283 impl::xml_document_struct& doc = impl::get_document(_root);
6285 // we can determine the offset reliably only if there is exactly once parse buffer
6286 if (!doc.buffer || doc.extra_buffers) return -1;
6294 case node_declaration:
6296 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6302 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6310 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6312 return (bool)lhs && rhs;
6315 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6317 return (bool)lhs || rhs;
6321 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6325 PUGI__FN xml_node_struct* xml_text::_data() const
6327 if (!_root || impl::is_text_node(_root)) return _root;
6329 // element nodes can have value if parse_embed_pcdata was used
6330 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6333 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6334 if (impl::is_text_node(node))
6340 PUGI__FN xml_node_struct* xml_text::_data_new()
6342 xml_node_struct* d = _data();
6345 return xml_node(_root).append_child(node_pcdata).internal_object();
6348 PUGI__FN xml_text::xml_text(): _root(0)
6352 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6356 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6358 return _data() ? unspecified_bool_xml_text : 0;
6361 PUGI__FN bool xml_text::operator!() const
6366 PUGI__FN bool xml_text::empty() const
6368 return _data() == 0;
6371 PUGI__FN const char_t* xml_text::get() const
6373 xml_node_struct* d = _data();
6375 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6378 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6380 xml_node_struct* d = _data();
6382 return (d && d->value) ? d->value + 0 : def;
6385 PUGI__FN int xml_text::as_int(int def) const
6387 xml_node_struct* d = _data();
6389 return (d && d->value) ? impl::get_value_int(d->value) : def;
6392 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6394 xml_node_struct* d = _data();
6396 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6399 PUGI__FN double xml_text::as_double(double def) const
6401 xml_node_struct* d = _data();
6403 return (d && d->value) ? impl::get_value_double(d->value) : def;
6406 PUGI__FN float xml_text::as_float(float def) const
6408 xml_node_struct* d = _data();
6410 return (d && d->value) ? impl::get_value_float(d->value) : def;
6413 PUGI__FN bool xml_text::as_bool(bool def) const
6415 xml_node_struct* d = _data();
6417 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6420 #ifdef PUGIXML_HAS_LONG_LONG
6421 PUGI__FN long long xml_text::as_llong(long long def) const
6423 xml_node_struct* d = _data();
6425 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6428 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6430 xml_node_struct* d = _data();
6432 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6436 PUGI__FN bool xml_text::set(const char_t* rhs)
6438 xml_node_struct* dn = _data_new();
6440 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6443 PUGI__FN bool xml_text::set(int rhs)
6445 xml_node_struct* dn = _data_new();
6447 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6450 PUGI__FN bool xml_text::set(unsigned int rhs)
6452 xml_node_struct* dn = _data_new();
6454 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6457 PUGI__FN bool xml_text::set(long rhs)
6459 xml_node_struct* dn = _data_new();
6461 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6464 PUGI__FN bool xml_text::set(unsigned long rhs)
6466 xml_node_struct* dn = _data_new();
6468 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6471 PUGI__FN bool xml_text::set(float rhs)
6473 xml_node_struct* dn = _data_new();
6475 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6478 PUGI__FN bool xml_text::set(double rhs)
6480 xml_node_struct* dn = _data_new();
6482 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6485 PUGI__FN bool xml_text::set(bool rhs)
6487 xml_node_struct* dn = _data_new();
6489 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6492 #ifdef PUGIXML_HAS_LONG_LONG
6493 PUGI__FN bool xml_text::set(long long rhs)
6495 xml_node_struct* dn = _data_new();
6497 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6500 PUGI__FN bool xml_text::set(unsigned long long rhs)
6502 xml_node_struct* dn = _data_new();
6504 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6508 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6514 PUGI__FN xml_text& xml_text::operator=(int rhs)
6520 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6526 PUGI__FN xml_text& xml_text::operator=(long rhs)
6532 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6538 PUGI__FN xml_text& xml_text::operator=(double rhs)
6544 PUGI__FN xml_text& xml_text::operator=(float rhs)
6550 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6556 #ifdef PUGIXML_HAS_LONG_LONG
6557 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6563 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6570 PUGI__FN xml_node xml_text::data() const
6572 return xml_node(_data());
6576 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6578 return (bool)lhs && rhs;
6581 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6583 return (bool)lhs || rhs;
6587 PUGI__FN xml_node_iterator::xml_node_iterator()
6591 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6595 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6599 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6601 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6604 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6606 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6609 PUGI__FN xml_node& xml_node_iterator::operator*() const
6611 assert(_wrap._root);
6615 PUGI__FN xml_node* xml_node_iterator::operator->() const
6617 assert(_wrap._root);
6618 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6621 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6623 assert(_wrap._root);
6624 _wrap._root = _wrap._root->next_sibling;
6628 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6630 xml_node_iterator temp = *this;
6635 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6637 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6641 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6643 xml_node_iterator temp = *this;
6648 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6652 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6656 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6660 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6662 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6665 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6667 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6670 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6672 assert(_wrap._attr);
6676 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6678 assert(_wrap._attr);
6679 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6682 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6684 assert(_wrap._attr);
6685 _wrap._attr = _wrap._attr->next_attribute;
6689 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6691 xml_attribute_iterator temp = *this;
6696 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6698 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6702 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6704 xml_attribute_iterator temp = *this;
6709 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6713 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6717 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6721 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6723 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6726 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6728 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6731 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6733 assert(_wrap._root);
6737 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6739 assert(_wrap._root);
6740 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6743 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6745 assert(_wrap._root);
6746 _wrap = _wrap.next_sibling(_name);
6750 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6752 xml_named_node_iterator temp = *this;
6757 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6760 _wrap = _wrap.previous_sibling(_name);
6763 _wrap = _parent.last_child();
6765 if (!impl::strequal(_wrap.name(), _name))
6766 _wrap = _wrap.previous_sibling(_name);
6772 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6774 xml_named_node_iterator temp = *this;
6779 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6783 PUGI__FN xml_parse_result::operator bool() const
6785 return status == status_ok;
6788 PUGI__FN const char* xml_parse_result::description() const
6792 case status_ok: return "No error";
6794 case status_file_not_found: return "File was not found";
6795 case status_io_error: return "Error reading from file/stream";
6796 case status_out_of_memory: return "Could not allocate memory";
6797 case status_internal_error: return "Internal error occurred";
6799 case status_unrecognized_tag: return "Could not determine tag type";
6801 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6802 case status_bad_comment: return "Error parsing comment";
6803 case status_bad_cdata: return "Error parsing CDATA section";
6804 case status_bad_doctype: return "Error parsing document type declaration";
6805 case status_bad_pcdata: return "Error parsing PCDATA section";
6806 case status_bad_start_element: return "Error parsing start element tag";
6807 case status_bad_attribute: return "Error parsing element attribute";
6808 case status_bad_end_element: return "Error parsing end element tag";
6809 case status_end_element_mismatch: return "Start-end tags mismatch";
6811 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6813 case status_no_document_element: return "No document element found";
6815 default: return "Unknown error";
6819 PUGI__FN xml_document::xml_document(): _buffer(0)
6824 PUGI__FN xml_document::~xml_document()
6829 PUGI__FN void xml_document::reset()
6835 PUGI__FN void xml_document::reset(const xml_document& proto)
6839 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6843 PUGI__FN void xml_document::create()
6847 #ifdef PUGIXML_COMPACT
6848 const size_t page_offset = sizeof(uint32_t);
6850 const size_t page_offset = 0;
6853 // initialize sentinel page
6854 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6856 // prepare page structure
6857 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6860 page->busy_size = impl::xml_memory_page_size;
6862 // setup first page marker
6863 #ifdef PUGIXML_COMPACT
6864 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
6865 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6866 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6869 // allocate new root
6870 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6871 _root->prev_sibling_c = _root;
6873 // setup sentinel page
6874 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6876 // verify the document allocation
6877 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6880 PUGI__FN void xml_document::destroy()
6884 // destroy static storage
6887 impl::xml_memory::deallocate(_buffer);
6891 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6892 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6894 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6897 // destroy dynamic storage, leave sentinel page (it's in static memory)
6898 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6899 assert(root_page && !root_page->prev);
6900 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6902 for (impl::xml_memory_page* page = root_page->next; page; )
6904 impl::xml_memory_page* next = page->next;
6906 impl::xml_allocator::deallocate_page(page);
6911 #ifdef PUGIXML_COMPACT
6912 // destroy hash table
6913 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6919 #ifndef PUGIXML_NO_STL
6920 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
6924 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
6927 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
6931 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
6935 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
6937 // Force native encoding (skip autodetection)
6938 #ifdef PUGIXML_WCHAR_MODE
6939 xml_encoding encoding = encoding_wchar;
6941 xml_encoding encoding = encoding_utf8;
6944 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
6947 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
6949 return load_string(contents, options);
6952 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
6956 using impl::auto_deleter; // MSVC7 workaround
6957 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
6959 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6962 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
6966 using impl::auto_deleter; // MSVC7 workaround
6967 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
6969 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6972 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6976 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
6979 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6983 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
6986 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6990 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
6993 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6995 impl::xml_buffered_writer buffered_writer(writer, encoding);
6997 if ((flags & format_write_bom) && encoding != encoding_latin1)
6999 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7000 #ifdef PUGIXML_WCHAR_MODE
7001 unsigned int bom = 0xfeff;
7002 buffered_writer.write(static_cast<wchar_t>(bom));
7004 buffered_writer.write('\xef', '\xbb', '\xbf');
7008 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7010 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7011 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7012 buffered_writer.write('?', '>');
7013 if (!(flags & format_raw)) buffered_writer.write('\n');
7016 impl::node_output(buffered_writer, _root, indent, flags, 0);
7018 buffered_writer.flush();
7021 #ifndef PUGIXML_NO_STL
7022 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7024 xml_writer_stream writer(stream);
7026 save(writer, indent, flags, encoding);
7029 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7031 xml_writer_stream writer(stream);
7033 save(writer, indent, flags, encoding_wchar);
7037 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7039 using impl::auto_deleter; // MSVC7 workaround
7040 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7042 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7045 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7047 using impl::auto_deleter; // MSVC7 workaround
7048 wchar_t openMode[3] = {L'\0'};
7049 if (flags & format_save_file_text) {
7055 auto_deleter<FILE> file(impl::open_file_wide(path_, openMode), impl::close_file);
7057 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7060 PUGI__FN xml_node xml_document::document_element() const
7064 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7065 if (PUGI__NODETYPE(i) == node_element)
7071 #ifndef PUGIXML_NO_STL
7072 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7076 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7079 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7081 return impl::as_utf8_impl(str.c_str(), str.size());
7084 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7088 return impl::as_wide_impl(str, strlen(str));
7091 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7093 return impl::as_wide_impl(str.c_str(), str.size());
7097 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7099 impl::xml_memory::allocate = allocate;
7100 impl::xml_memory::deallocate = deallocate;
7103 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7105 return impl::xml_memory::allocate;
7108 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7110 return impl::xml_memory::deallocate;
7114 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7117 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7118 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7120 return std::bidirectional_iterator_tag();
7123 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7125 return std::bidirectional_iterator_tag();
7128 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7130 return std::bidirectional_iterator_tag();
7135 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7138 // Workarounds for (non-standard) iterator category detection
7139 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7141 return std::bidirectional_iterator_tag();
7144 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7146 return std::bidirectional_iterator_tag();
7149 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7151 return std::bidirectional_iterator_tag();
7156 #ifndef PUGIXML_NO_XPATH
7161 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7169 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7177 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7185 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7191 template <typename T> void swap(T& lhs, T& rhs)
7198 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7202 for (I it = begin + 1; it != end; ++it)
7203 if (pred(*it, *result))
7209 template <typename I> void reverse(I begin, I end)
7211 while (end - begin > 1) swap(*begin++, *--end);
7214 template <typename I> I unique(I begin, I end)
7217 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7219 if (begin == end) return begin;
7221 // last written element
7224 // merge unique elements
7225 while (begin != end)
7227 if (*begin != *write)
7228 *++write = *begin++;
7233 // past-the-end (write points to live element)
7237 template <typename I> void copy_backwards(I begin, I end, I target)
7239 while (begin != end) *--target = *--end;
7242 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
7244 assert(begin != end);
7246 for (I it = begin + 1; it != end; ++it)
7250 if (pred(val, *begin))
7253 copy_backwards(begin, it, it + 1);
7260 // move hole backwards
7261 while (pred(val, *(hole - 1)))
7263 *hole = *(hole - 1);
7267 // fill hole with element
7273 // std variant for elements with ==
7274 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
7276 I eqbeg = middle, eqend = middle + 1;
7278 // expand equal range
7279 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7280 while (eqend != end && *eqend == *eqbeg) ++eqend;
7282 // process outer elements
7283 I ltend = eqbeg, gtbeg = eqend;
7287 // find the element from the right side that belongs to the left one
7288 for (; gtbeg != end; ++gtbeg)
7289 if (!pred(*eqbeg, *gtbeg))
7291 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
7295 // find the element from the left side that belongs to the right one
7296 for (; ltend != begin; --ltend)
7297 if (!pred(*(ltend - 1), *eqbeg))
7299 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
7303 // scanned all elements
7304 if (gtbeg == end && ltend == begin)
7311 // make room for elements by moving equal area
7314 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7315 swap(*eqbeg, *--eqend);
7317 else if (ltend == begin)
7319 if (eqend != gtbeg) swap(*eqbeg, *eqend);
7321 swap(*gtbeg++, *eqbeg++);
7323 else swap(*gtbeg++, *--ltend);
7327 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
7329 if (pred(*middle, *first)) swap(*middle, *first);
7330 if (pred(*last, *middle)) swap(*last, *middle);
7331 if (pred(*middle, *first)) swap(*middle, *first);
7334 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
7336 if (last - first <= 40)
7338 // median of three for small chunks
7339 median3(first, middle, last, pred);
7344 size_t step = (last - first + 1) / 8;
7346 median3(first, first + step, first + 2 * step, pred);
7347 median3(middle - step, middle, middle + step, pred);
7348 median3(last - 2 * step, last - step, last, pred);
7349 median3(first + step, middle, last - step, pred);
7353 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7355 // sort large chunks
7356 while (end - begin > 32)
7358 // find median element
7359 I middle = begin + (end - begin) / 2;
7360 median(begin, middle, end - 1, pred);
7362 // partition in three chunks (< = >)
7364 partition(begin, middle, end, pred, &eqbeg, &eqend);
7366 // loop on larger half
7367 if (eqbeg - begin > end - eqend)
7369 sort(eqend, end, pred);
7374 sort(begin, eqbeg, pred);
7379 // insertion sort small chunk
7380 if (begin != end) insertion_sort(begin, end, pred, &*begin);
7384 // Allocator used for AST and evaluation stacks
7386 static const size_t xpath_memory_page_size =
7387 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7388 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7394 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7396 struct xpath_memory_block
7398 xpath_memory_block* next;
7403 char data[xpath_memory_page_size];
7408 class xpath_allocator
7410 xpath_memory_block* _root;
7414 #ifdef PUGIXML_NO_EXCEPTIONS
7415 jmp_buf* error_handler;
7418 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
7420 #ifdef PUGIXML_NO_EXCEPTIONS
7425 void* allocate_nothrow(size_t size)
7427 // round size up to block alignment boundary
7428 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7430 if (_root_size + size <= _root->capacity)
7432 void* buf = &_root->data[0] + _root_size;
7438 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7439 size_t block_capacity_base = sizeof(_root->data);
7440 size_t block_capacity_req = size + block_capacity_base / 4;
7441 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7443 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7445 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7446 if (!block) return 0;
7448 block->next = _root;
7449 block->capacity = block_capacity;
7458 void* allocate(size_t size)
7460 void* result = allocate_nothrow(size);
7464 #ifdef PUGIXML_NO_EXCEPTIONS
7465 assert(error_handler);
7466 longjmp(*error_handler, 1);
7468 throw std::bad_alloc();
7475 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7477 // round size up to block alignment boundary
7478 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7479 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7481 // we can only reallocate the last object
7482 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7484 // adjust root size so that we have not allocated the object at all
7485 bool only_object = (_root_size == old_size);
7487 if (ptr) _root_size -= old_size;
7489 // allocate a new version (this will obviously reuse the memory if possible)
7490 void* result = allocate(new_size);
7493 // we have a new block
7494 if (result != ptr && ptr)
7497 assert(new_size >= old_size);
7498 memcpy(result, ptr, old_size);
7500 // free the previous page if it had no other objects
7503 assert(_root->data == result);
7504 assert(_root->next);
7506 xpath_memory_block* next = _root->next->next;
7510 // deallocate the whole page, unless it was the first one
7511 xml_memory::deallocate(_root->next);
7520 void revert(const xpath_allocator& state)
7522 // free all new pages
7523 xpath_memory_block* cur = _root;
7525 while (cur != state._root)
7527 xpath_memory_block* next = cur->next;
7529 xml_memory::deallocate(cur);
7535 _root = state._root;
7536 _root_size = state._root_size;
7541 xpath_memory_block* cur = _root;
7546 xpath_memory_block* next = cur->next;
7548 xml_memory::deallocate(cur);
7555 struct xpath_allocator_capture
7557 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7561 ~xpath_allocator_capture()
7563 _target->revert(_state);
7566 xpath_allocator* _target;
7567 xpath_allocator _state;
7572 xpath_allocator* result;
7573 xpath_allocator* temp;
7576 struct xpath_stack_data
7578 xpath_memory_block blocks[2];
7579 xpath_allocator result;
7580 xpath_allocator temp;
7583 #ifdef PUGIXML_NO_EXCEPTIONS
7584 jmp_buf error_handler;
7587 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
7589 blocks[0].next = blocks[1].next = 0;
7590 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7592 stack.result = &result;
7595 #ifdef PUGIXML_NO_EXCEPTIONS
7596 result.error_handler = temp.error_handler = &error_handler;
7612 const char_t* _buffer;
7614 size_t _length_heap;
7616 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7618 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7621 memcpy(result, string, length * sizeof(char_t));
7627 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7632 static xpath_string from_const(const char_t* str)
7634 return xpath_string(str, false, 0);
7637 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7639 assert(begin <= end && *end == 0);
7641 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7644 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7646 assert(begin <= end);
7648 size_t length = static_cast<size_t>(end - begin);
7650 return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
7653 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7657 void append(const xpath_string& o, xpath_allocator* alloc)
7659 // skip empty sources
7660 if (!*o._buffer) return;
7662 // fast append for constant empty target and constant source
7663 if (!*_buffer && !_uses_heap && !o._uses_heap)
7665 _buffer = o._buffer;
7669 // need to make heap copy
7670 size_t target_length = length();
7671 size_t source_length = o.length();
7672 size_t result_length = target_length + source_length;
7674 // allocate new buffer
7675 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7678 // append first string to the new buffer in case there was no reallocation
7679 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7681 // append second string to the new buffer
7682 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7683 result[result_length] = 0;
7688 _length_heap = result_length;
7692 const char_t* c_str() const
7697 size_t length() const
7699 return _uses_heap ? _length_heap : strlength(_buffer);
7702 char_t* data(xpath_allocator* alloc)
7704 // make private heap copy
7707 size_t length_ = strlength(_buffer);
7709 _buffer = duplicate_string(_buffer, length_, alloc);
7711 _length_heap = length_;
7714 return const_cast<char_t*>(_buffer);
7719 return *_buffer == 0;
7722 bool operator==(const xpath_string& o) const
7724 return strequal(_buffer, o._buffer);
7727 bool operator!=(const xpath_string& o) const
7729 return !strequal(_buffer, o._buffer);
7732 bool uses_heap() const
7740 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7742 while (*pattern && *string == *pattern)
7748 return *pattern == 0;
7751 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7753 #ifdef PUGIXML_WCHAR_MODE
7754 return wcschr(s, c);
7756 return strchr(s, c);
7760 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7762 #ifdef PUGIXML_WCHAR_MODE
7763 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7764 return (*p == 0) ? s : wcsstr(s, p);
7766 return strstr(s, p);
7770 // Converts symbol to lower case, if it is an ASCII one
7771 PUGI__FN char_t tolower_ascii(char_t ch)
7773 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7776 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7779 return xpath_string::from_const(na.attribute().value());
7782 xml_node n = na.node();
7790 return xpath_string::from_const(n.value());
7795 xpath_string result;
7797 // element nodes can have value if parse_embed_pcdata was used
7799 result.append(xpath_string::from_const(n.value()), alloc);
7801 xml_node cur = n.first_child();
7803 while (cur && cur != n)
7805 if (cur.type() == node_pcdata || cur.type() == node_cdata)
7806 result.append(xpath_string::from_const(cur.value()), alloc);
7808 if (cur.first_child())
7809 cur = cur.first_child();
7810 else if (cur.next_sibling())
7811 cur = cur.next_sibling();
7814 while (!cur.next_sibling() && cur != n)
7817 if (cur != n) cur = cur.next_sibling();
7825 return xpath_string();
7830 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7832 assert(ln->parent == rn->parent);
7834 // there is no common ancestor (the shared parent is null), nodes are from different documents
7835 if (!ln->parent) return ln < rn;
7837 // determine sibling order
7838 xml_node_struct* ls = ln;
7839 xml_node_struct* rs = rn;
7843 if (ls == rn) return true;
7844 if (rs == ln) return false;
7846 ls = ls->next_sibling;
7847 rs = rs->next_sibling;
7850 // if rn sibling chain ended ln must be before rn
7854 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7856 // find common ancestor at the same depth, if any
7857 xml_node_struct* lp = ln;
7858 xml_node_struct* rp = rn;
7860 while (lp && rp && lp->parent != rp->parent)
7866 // parents are the same!
7867 if (lp && rp) return node_is_before_sibling(lp, rp);
7869 // nodes are at different depths, need to normalize heights
7870 bool left_higher = !lp;
7884 // one node is the ancestor of the other
7885 if (ln == rn) return left_higher;
7887 // find common ancestor... again
7888 while (ln->parent != rn->parent)
7894 return node_is_before_sibling(ln, rn);
7897 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7899 while (node && node != parent) node = node->parent;
7901 return parent && node == parent;
7904 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7906 xml_node_struct* node = xnode.node().internal_object();
7910 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7912 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7913 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7919 xml_attribute_struct* attr = xnode.attribute().internal_object();
7923 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7925 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7926 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7935 struct document_order_comparator
7937 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7939 // optimized document order based check
7940 const void* lo = document_buffer_order(lhs);
7941 const void* ro = document_buffer_order(rhs);
7943 if (lo && ro) return lo < ro;
7946 xml_node ln = lhs.node(), rn = rhs.node();
7948 // compare attributes
7949 if (lhs.attribute() && rhs.attribute())
7952 if (lhs.parent() == rhs.parent())
7954 // determine sibling order
7955 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7956 if (a == rhs.attribute())
7962 // compare attribute parents
7966 else if (lhs.attribute())
7968 // attributes go after the parent element
7969 if (lhs.parent() == rhs.node()) return false;
7973 else if (rhs.attribute())
7975 // attributes go after the parent element
7976 if (rhs.parent() == lhs.node()) return true;
7981 if (ln == rn) return false;
7983 if (!ln || !rn) return ln < rn;
7985 return node_is_before(ln.internal_object(), rn.internal_object());
7989 struct duplicate_comparator
7991 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7993 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
7994 else return rhs.attribute() ? false : lhs.node() < rhs.node();
7998 PUGI__FN double gen_nan()
8000 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8001 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8002 typedef uint32_t UI; // BCC5 workaround
8003 union { float f; UI i; } u;
8008 const volatile double zero = 0.0;
8013 PUGI__FN bool is_nan(double value)
8015 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8016 return !!_isnan(value);
8017 #elif defined(fpclassify) && defined(FP_NAN)
8018 return fpclassify(value) == FP_NAN;
8021 const volatile double v = value;
8026 PUGI__FN const char_t* convert_number_to_string_special(double value)
8028 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8029 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8030 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8031 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8032 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8033 switch (fpclassify(value))
8036 return PUGIXML_TEXT("NaN");
8039 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8042 return PUGIXML_TEXT("0");
8049 const volatile double v = value;
8051 if (v == 0) return PUGIXML_TEXT("0");
8052 if (v != v) return PUGIXML_TEXT("NaN");
8053 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8058 PUGI__FN bool convert_number_to_boolean(double value)
8060 return (value != 0 && !is_nan(value));
8063 PUGI__FN void truncate_zeros(char* begin, char* end)
8065 while (begin != end && end[-1] == '0') end--;
8070 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8071 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8072 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8076 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
8078 // truncate redundant zeros
8079 truncate_zeros(buffer, buffer + strlen(buffer));
8082 *out_mantissa = buffer;
8083 *out_exponent = exponent;
8086 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8088 // get a scientific notation value with IEEE DBL_DIG decimals
8089 sprintf(buffer, "%.*e", DBL_DIG, value);
8090 assert(strlen(buffer) < buffer_size);
8093 // get the exponent (possibly negative)
8094 char* exponent_string = strchr(buffer, 'e');
8095 assert(exponent_string);
8097 int exponent = atoi(exponent_string + 1);
8099 // extract mantissa string: skip sign
8100 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8101 assert(mantissa[0] != '0' && mantissa[1] == '.');
8103 // divide mantissa by 10 to eliminate integer part
8104 mantissa[1] = mantissa[0];
8108 // remove extra mantissa digits and zero-terminate mantissa
8109 truncate_zeros(mantissa, exponent_string);
8112 *out_mantissa = mantissa;
8113 *out_exponent = exponent;
8117 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8119 // try special number conversion
8120 const char_t* special = convert_number_to_string_special(value);
8121 if (special) return xpath_string::from_const(special);
8123 // get mantissa + exponent form
8124 char mantissa_buffer[32] = {};
8125 char* mantissa = nullptr;
8128 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
8130 // allocate a buffer of suitable length for the number
8131 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8132 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8139 if (value < 0) *s++ = '-';
8148 while (exponent > 0)
8150 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
8151 *s++ = *mantissa ? *mantissa++ : '0';
8162 // extra zeroes from negative exponent
8163 while (exponent < 0)
8169 // extra mantissa digits
8172 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8178 assert(s < result + result_size);
8181 return xpath_string::from_heap_preallocated(result, s);
8184 PUGI__FN bool check_string_to_number_format(const char_t* string)
8186 // parse leading whitespace
8187 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8190 if (*string == '-') ++string;
8192 if (!*string) return false;
8194 // if there is no integer part, there should be a decimal part with at least one digit
8195 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8197 // parse integer part
8198 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8200 // parse decimal part
8205 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8208 // parse trailing whitespace
8209 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8211 return *string == 0;
8214 PUGI__FN double convert_string_to_number(const char_t* string)
8216 // check string format
8217 if (!check_string_to_number_format(string)) return gen_nan();
8220 #ifdef PUGIXML_WCHAR_MODE
8221 return wcstod(string, 0);
8223 return strtod(string, 0);
8227 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8229 size_t length = static_cast<size_t>(end - begin);
8230 char_t* scratch = buffer;
8232 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8234 // need to make dummy on-heap copy
8235 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8236 if (!scratch) return false;
8239 // copy string to zero-terminated buffer and perform conversion
8240 memcpy(scratch, begin, length * sizeof(char_t));
8241 scratch[length] = 0;
8243 *out_result = convert_string_to_number(scratch);
8245 // free dummy buffer
8246 if (scratch != buffer) xml_memory::deallocate(scratch);
8251 PUGI__FN double round_nearest(double value)
8253 return floor(value + 0.5);
8256 PUGI__FN double round_nearest_nzero(double value)
8258 // same as round_nearest, but returns -0 for [-0.5, -0]
8259 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8260 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8263 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8265 return node.attribute() ? node.attribute().name() : node.node().name();
8268 PUGI__FN const char_t* local_name(const xpath_node& node)
8270 const char_t* name = qualified_name(node);
8271 const char_t* p = find_char(name, ':');
8273 return p ? p + 1 : name;
8276 struct namespace_uri_predicate
8278 const char_t* prefix;
8279 size_t prefix_length;
8281 namespace_uri_predicate(const char_t* name)
8283 const char_t* pos = find_char(name, ':');
8285 prefix = pos ? name : 0;
8286 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8289 bool operator()(xml_attribute a) const
8291 const char_t* name = a.name();
8293 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8295 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8299 PUGI__FN const char_t* namespace_uri(xml_node node)
8301 namespace_uri_predicate pred = node.name();
8307 xml_attribute a = p.find_attribute(pred);
8309 if (a) return a.value();
8314 return PUGIXML_TEXT("");
8317 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8319 namespace_uri_predicate pred = attr.name();
8321 // Default namespace does not apply to attributes
8322 if (!pred.prefix) return PUGIXML_TEXT("");
8324 xml_node p = parent;
8328 xml_attribute a = p.find_attribute(pred);
8330 if (a) return a.value();
8335 return PUGIXML_TEXT("");
8338 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8340 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8343 PUGI__FN char_t* normalize_space(char_t* buffer)
8345 char_t* write = buffer;
8347 for (char_t* it = buffer; *it; )
8351 if (PUGI__IS_CHARTYPE(ch, ct_space))
8353 // replace whitespace sequence with single space
8354 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8356 // avoid leading spaces
8357 if (write != buffer) *write++ = ' ';
8362 // remove trailing space
8363 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8371 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8373 char_t* write = buffer;
8377 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8379 const char_t* pos = find_char(from, ch);
8382 *write++ = ch; // do not process
8383 else if (static_cast<size_t>(pos - from) < to_length)
8384 *write++ = to[pos - from]; // replace
8393 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8395 unsigned char table[128] = {0};
8399 unsigned int fc = static_cast<unsigned int>(*from);
8400 unsigned int tc = static_cast<unsigned int>(*to);
8402 if (fc >= 128 || tc >= 128)
8405 // code=128 means "skip character"
8407 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8413 for (int i = 0; i < 128; ++i)
8415 table[i] = static_cast<unsigned char>(i);
8417 void* result = alloc->allocate_nothrow(sizeof(table));
8421 memcpy(result, table, sizeof(table));
8424 return static_cast<unsigned char*>(result);
8427 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8429 char_t* write = buffer;
8433 char_t ch = *buffer++;
8434 unsigned int index = static_cast<unsigned int>(ch);
8438 unsigned char code = table[index];
8440 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8441 // this code skips these characters without extra branches
8442 *write = static_cast<char_t>(code);
8443 write += 1 - (code >> 7);
8457 inline bool is_xpath_attribute(const char_t* name)
8459 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8462 struct xpath_variable_boolean: xpath_variable
8464 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8472 struct xpath_variable_number: xpath_variable
8474 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8482 struct xpath_variable_string: xpath_variable
8484 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8488 ~xpath_variable_string()
8490 if (value) xml_memory::deallocate(value);
8497 struct xpath_variable_node_set: xpath_variable
8499 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8503 xpath_node_set value;
8507 static const xpath_node_set dummy_node_set;
8509 PUGI__FN unsigned int hash_string(const char_t* str)
8511 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8512 unsigned int result = 0;
8516 result += static_cast<unsigned int>(*str++);
8517 result += result << 10;
8518 result ^= result >> 6;
8521 result += result << 3;
8522 result ^= result >> 11;
8523 result += result << 15;
8528 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8530 size_t length = strlength(name);
8531 if (length == 0) return 0; // empty variable names are invalid
8533 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8534 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8535 if (!memory) return 0;
8537 T* result = new (memory) T();
8539 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8544 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8548 case xpath_type_node_set:
8549 return new_xpath_variable<xpath_variable_node_set>(name);
8551 case xpath_type_number:
8552 return new_xpath_variable<xpath_variable_number>(name);
8554 case xpath_type_string:
8555 return new_xpath_variable<xpath_variable_string>(name);
8557 case xpath_type_boolean:
8558 return new_xpath_variable<xpath_variable_boolean>(name);
8565 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8568 xml_memory::deallocate(var);
8571 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8575 case xpath_type_node_set:
8576 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8579 case xpath_type_number:
8580 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8583 case xpath_type_string:
8584 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8587 case xpath_type_boolean:
8588 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8592 assert(false && "Invalid variable type");
8596 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8598 switch (rhs->type())
8600 case xpath_type_node_set:
8601 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8603 case xpath_type_number:
8604 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8606 case xpath_type_string:
8607 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8609 case xpath_type_boolean:
8610 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8613 assert(false && "Invalid variable type");
8618 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8620 size_t length = static_cast<size_t>(end - begin);
8621 char_t* scratch = buffer;
8623 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8625 // need to make dummy on-heap copy
8626 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8627 if (!scratch) return false;
8630 // copy string to zero-terminated buffer and perform lookup
8631 memcpy(scratch, begin, length * sizeof(char_t));
8632 scratch[length] = 0;
8634 *out_result = set->get(scratch);
8636 // free dummy buffer
8637 if (scratch != buffer) xml_memory::deallocate(scratch);
8643 // Internal node set class
8645 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8647 if (end - begin < 2)
8648 return xpath_node_set::type_sorted;
8650 document_order_comparator cmp;
8652 bool first = cmp(begin[0], begin[1]);
8654 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8655 if (cmp(it[0], it[1]) != first)
8656 return xpath_node_set::type_unsorted;
8658 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8661 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8663 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8665 if (type == xpath_node_set::type_unsorted)
8667 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8669 if (sorted == xpath_node_set::type_unsorted)
8671 sort(begin, end, document_order_comparator());
8673 type = xpath_node_set::type_sorted;
8679 if (type != order) reverse(begin, end);
8684 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8686 if (begin == end) return xpath_node();
8690 case xpath_node_set::type_sorted:
8693 case xpath_node_set::type_sorted_reverse:
8696 case xpath_node_set::type_unsorted:
8697 return *min_element(begin, end, document_order_comparator());
8700 assert(false && "Invalid node set type");
8701 return xpath_node();
8705 class xpath_node_set_raw
8707 xpath_node_set::type_t _type;
8714 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8718 xpath_node* begin() const
8723 xpath_node* end() const
8730 return _begin == _end;
8735 return static_cast<size_t>(_end - _begin);
8738 xpath_node first() const
8740 return xpath_first(_begin, _end, _type);
8743 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8745 void push_back(const xpath_node& node, xpath_allocator* alloc)
8750 push_back_grow(node, alloc);
8753 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8755 if (begin_ == end_) return;
8757 size_t size_ = static_cast<size_t>(_end - _begin);
8758 size_t capacity = static_cast<size_t>(_eos - _begin);
8759 size_t count = static_cast<size_t>(end_ - begin_);
8761 if (size_ + count > capacity)
8763 // reallocate the old array or allocate a new one
8764 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8769 _end = data + size_;
8770 _eos = data + size_ + count;
8773 memcpy(_end, begin_, count * sizeof(xpath_node));
8779 _type = xpath_sort(_begin, _end, _type, false);
8782 void truncate(xpath_node* pos)
8784 assert(_begin <= pos && pos <= _end);
8789 void remove_duplicates()
8791 if (_type == xpath_node_set::type_unsorted)
8792 sort(_begin, _end, duplicate_comparator());
8794 _end = unique(_begin, _end);
8797 xpath_node_set::type_t type() const
8802 void set_type(xpath_node_set::type_t value)
8808 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8810 size_t capacity = static_cast<size_t>(_eos - _begin);
8812 // get new capacity (1.5x rule)
8813 size_t new_capacity = capacity + capacity / 2 + 1;
8815 // reallocate the old array or allocate a new one
8816 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8821 _end = data + capacity;
8822 _eos = data + new_capacity;
8830 struct xpath_context
8833 size_t position, size;
8835 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8848 lex_greater_or_equal,
8860 lex_open_square_brace,
8861 lex_close_square_brace,
8871 struct xpath_lexer_string
8873 const char_t* begin;
8876 xpath_lexer_string(): begin(0), end(0)
8880 bool operator==(const char_t* other) const
8882 size_t length = static_cast<size_t>(end - begin);
8884 return strequalrange(other, begin, length);
8891 const char_t* _cur_lexeme_pos;
8892 xpath_lexer_string _cur_lexeme_contents;
8894 lexeme_t _cur_lexeme;
8897 explicit xpath_lexer(const char_t* query): _cur(query)
8902 const char_t* state() const
8909 const char_t* cur = _cur;
8911 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8913 // save lexeme position for error reporting
8914 _cur_lexeme_pos = cur;
8919 _cur_lexeme = lex_eof;
8923 if (*(cur+1) == '=')
8926 _cur_lexeme = lex_greater_or_equal;
8931 _cur_lexeme = lex_greater;
8936 if (*(cur+1) == '=')
8939 _cur_lexeme = lex_less_or_equal;
8944 _cur_lexeme = lex_less;
8949 if (*(cur+1) == '=')
8952 _cur_lexeme = lex_not_equal;
8956 _cur_lexeme = lex_none;
8962 _cur_lexeme = lex_equal;
8968 _cur_lexeme = lex_plus;
8974 _cur_lexeme = lex_minus;
8980 _cur_lexeme = lex_multiply;
8986 _cur_lexeme = lex_union;
8993 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
8995 _cur_lexeme_contents.begin = cur;
8997 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8999 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9003 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9006 _cur_lexeme_contents.end = cur;
9008 _cur_lexeme = lex_var_ref;
9012 _cur_lexeme = lex_none;
9019 _cur_lexeme = lex_open_brace;
9025 _cur_lexeme = lex_close_brace;
9031 _cur_lexeme = lex_open_square_brace;
9037 _cur_lexeme = lex_close_square_brace;
9043 _cur_lexeme = lex_comma;
9048 if (*(cur+1) == '/')
9051 _cur_lexeme = lex_double_slash;
9056 _cur_lexeme = lex_slash;
9061 if (*(cur+1) == '.')
9064 _cur_lexeme = lex_double_dot;
9066 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9068 _cur_lexeme_contents.begin = cur; // .
9072 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9074 _cur_lexeme_contents.end = cur;
9076 _cur_lexeme = lex_number;
9081 _cur_lexeme = lex_dot;
9087 _cur_lexeme = lex_axis_attribute;
9094 char_t terminator = *cur;
9098 _cur_lexeme_contents.begin = cur;
9099 while (*cur && *cur != terminator) cur++;
9100 _cur_lexeme_contents.end = cur;
9103 _cur_lexeme = lex_none;
9107 _cur_lexeme = lex_quoted_string;
9114 if (*(cur+1) == ':')
9117 _cur_lexeme = lex_double_colon;
9121 _cur_lexeme = lex_none;
9126 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9128 _cur_lexeme_contents.begin = cur;
9130 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9136 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9139 _cur_lexeme_contents.end = cur;
9141 _cur_lexeme = lex_number;
9143 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9145 _cur_lexeme_contents.begin = cur;
9147 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9151 if (cur[1] == '*') // namespace test ncname:*
9155 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9159 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9163 _cur_lexeme_contents.end = cur;
9165 _cur_lexeme = lex_string;
9169 _cur_lexeme = lex_none;
9176 lexeme_t current() const
9181 const char_t* current_pos() const
9183 return _cur_lexeme_pos;
9186 const xpath_lexer_string& contents() const
9188 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9190 return _cur_lexeme_contents;
9197 ast_op_or, // left or right
9198 ast_op_and, // left and right
9199 ast_op_equal, // left = right
9200 ast_op_not_equal, // left != right
9201 ast_op_less, // left < right
9202 ast_op_greater, // left > right
9203 ast_op_less_or_equal, // left <= right
9204 ast_op_greater_or_equal, // left >= right
9205 ast_op_add, // left + right
9206 ast_op_subtract, // left - right
9207 ast_op_multiply, // left * right
9208 ast_op_divide, // left / right
9209 ast_op_mod, // left % right
9210 ast_op_negate, // left - right
9211 ast_op_union, // left | right
9212 ast_predicate, // apply predicate to set; next points to next predicate
9213 ast_filter, // select * from left where right
9214 ast_string_constant, // string constant
9215 ast_number_constant, // number constant
9216 ast_variable, // variable
9217 ast_func_last, // last()
9218 ast_func_position, // position()
9219 ast_func_count, // count(left)
9220 ast_func_id, // id(left)
9221 ast_func_local_name_0, // local-name()
9222 ast_func_local_name_1, // local-name(left)
9223 ast_func_namespace_uri_0, // namespace-uri()
9224 ast_func_namespace_uri_1, // namespace-uri(left)
9225 ast_func_name_0, // name()
9226 ast_func_name_1, // name(left)
9227 ast_func_string_0, // string()
9228 ast_func_string_1, // string(left)
9229 ast_func_concat, // concat(left, right, siblings)
9230 ast_func_starts_with, // starts_with(left, right)
9231 ast_func_contains, // contains(left, right)
9232 ast_func_substring_before, // substring-before(left, right)
9233 ast_func_substring_after, // substring-after(left, right)
9234 ast_func_substring_2, // substring(left, right)
9235 ast_func_substring_3, // substring(left, right, third)
9236 ast_func_string_length_0, // string-length()
9237 ast_func_string_length_1, // string-length(left)
9238 ast_func_normalize_space_0, // normalize-space()
9239 ast_func_normalize_space_1, // normalize-space(left)
9240 ast_func_translate, // translate(left, right, third)
9241 ast_func_boolean, // boolean(left)
9242 ast_func_not, // not(left)
9243 ast_func_true, // true()
9244 ast_func_false, // false()
9245 ast_func_lang, // lang(left)
9246 ast_func_number_0, // number()
9247 ast_func_number_1, // number(left)
9248 ast_func_sum, // sum(left)
9249 ast_func_floor, // floor(left)
9250 ast_func_ceiling, // ceiling(left)
9251 ast_func_round, // round(left)
9252 ast_step, // process set left with step
9253 ast_step_root, // select root node
9255 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9256 ast_opt_compare_attribute // @name = 'string'
9262 axis_ancestor_or_self,
9266 axis_descendant_or_self,
9268 axis_following_sibling,
9272 axis_preceding_sibling,
9281 nodetest_type_comment,
9286 nodetest_all_in_namespace
9294 predicate_constant_one
9304 template <axis_t N> struct axis_to_type
9306 static const axis_t axis;
9309 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9311 class xpath_ast_node
9321 // for ast_step/ast_predicate/ast_filter
9324 // tree node structure
9325 xpath_ast_node* _left;
9326 xpath_ast_node* _right;
9327 xpath_ast_node* _next;
9331 // value for ast_string_constant
9332 const char_t* string;
9333 // value for ast_number_constant
9335 // variable for ast_variable
9336 xpath_variable* variable;
9337 // node test for ast_step (node name/namespace/node type/pi target)
9338 const char_t* nodetest;
9339 // table for ast_opt_translate_table
9340 const unsigned char* table;
9343 xpath_ast_node(const xpath_ast_node&);
9344 xpath_ast_node& operator=(const xpath_ast_node&);
9346 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9348 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9350 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9352 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9353 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9354 else if (lt == xpath_type_number || rt == xpath_type_number)
9355 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9356 else if (lt == xpath_type_string || rt == xpath_type_string)
9358 xpath_allocator_capture cr(stack.result);
9360 xpath_string ls = lhs->eval_string(c, stack);
9361 xpath_string rs = rhs->eval_string(c, stack);
9363 return comp(ls, rs);
9366 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9368 xpath_allocator_capture cr(stack.result);
9370 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9371 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9373 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9374 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9376 xpath_allocator_capture cri(stack.result);
9378 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9386 if (lt == xpath_type_node_set)
9392 if (lt == xpath_type_boolean)
9393 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9394 else if (lt == xpath_type_number)
9396 xpath_allocator_capture cr(stack.result);
9398 double l = lhs->eval_number(c, stack);
9399 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9401 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9403 xpath_allocator_capture cri(stack.result);
9405 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9411 else if (lt == xpath_type_string)
9413 xpath_allocator_capture cr(stack.result);
9415 xpath_string l = lhs->eval_string(c, stack);
9416 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9418 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9420 xpath_allocator_capture cri(stack.result);
9422 if (comp(l, string_value(*ri, stack.result)))
9430 assert(false && "Wrong types");
9434 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9436 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9439 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9441 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9443 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9444 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9445 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9447 xpath_allocator_capture cr(stack.result);
9449 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9450 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9452 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9454 xpath_allocator_capture cri(stack.result);
9456 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9458 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9460 xpath_allocator_capture crii(stack.result);
9462 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9469 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9471 xpath_allocator_capture cr(stack.result);
9473 double l = lhs->eval_number(c, stack);
9474 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9476 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9478 xpath_allocator_capture cri(stack.result);
9480 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9486 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9488 xpath_allocator_capture cr(stack.result);
9490 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9491 double r = rhs->eval_number(c, stack);
9493 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9495 xpath_allocator_capture cri(stack.result);
9497 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9505 assert(false && "Wrong types");
9510 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9512 assert(ns.size() >= first);
9513 assert(expr->rettype() != xpath_type_number);
9516 size_t size = ns.size() - first;
9518 xpath_node* last = ns.begin() + first;
9520 // remove_if... or well, sort of
9521 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9523 xpath_context c(*it, i, size);
9525 if (expr->eval_boolean(c, stack))
9536 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9538 assert(ns.size() >= first);
9539 assert(expr->rettype() == xpath_type_number);
9542 size_t size = ns.size() - first;
9544 xpath_node* last = ns.begin() + first;
9546 // remove_if... or well, sort of
9547 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9549 xpath_context c(*it, i, size);
9551 if (expr->eval_number(c, stack) == i)
9562 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9564 assert(ns.size() >= first);
9565 assert(expr->rettype() == xpath_type_number);
9567 size_t size = ns.size() - first;
9569 xpath_node* last = ns.begin() + first;
9571 xpath_context c(xpath_node(), 1, size);
9573 double er = expr->eval_number(c, stack);
9575 if (er >= 1.0 && er <= size)
9577 size_t eri = static_cast<size_t>(er);
9581 xpath_node r = last[eri - 1];
9590 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9592 if (ns.size() == first) return;
9594 assert(_type == ast_filter || _type == ast_predicate);
9596 if (_test == predicate_constant || _test == predicate_constant_one)
9597 apply_predicate_number_const(ns, first, _right, stack);
9598 else if (_right->rettype() == xpath_type_number)
9599 apply_predicate_number(ns, first, _right, stack, once);
9601 apply_predicate_boolean(ns, first, _right, stack, once);
9604 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9606 if (ns.size() == first) return;
9608 bool last_once = eval_once(ns.type(), eval);
9610 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9611 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9614 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9618 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9623 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9625 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9630 case nodetest_type_node:
9632 if (is_xpath_attribute(name))
9634 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9639 case nodetest_all_in_namespace:
9640 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9642 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9654 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9658 xml_node_type type = PUGI__NODETYPE(n);
9663 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9665 ns.push_back(xml_node(n), alloc);
9670 case nodetest_type_node:
9671 ns.push_back(xml_node(n), alloc);
9674 case nodetest_type_comment:
9675 if (type == node_comment)
9677 ns.push_back(xml_node(n), alloc);
9682 case nodetest_type_text:
9683 if (type == node_pcdata || type == node_cdata)
9685 ns.push_back(xml_node(n), alloc);
9690 case nodetest_type_pi:
9691 if (type == node_pi)
9693 ns.push_back(xml_node(n), alloc);
9699 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9701 ns.push_back(xml_node(n), alloc);
9707 if (type == node_element)
9709 ns.push_back(xml_node(n), alloc);
9714 case nodetest_all_in_namespace:
9715 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9717 ns.push_back(xml_node(n), alloc);
9723 assert(false && "Unknown axis");
9729 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9731 const axis_t axis = T::axis;
9735 case axis_attribute:
9737 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9738 if (step_push(ns, a, n, alloc) & once)
9746 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9747 if (step_push(ns, c, alloc) & once)
9753 case axis_descendant:
9754 case axis_descendant_or_self:
9756 if (axis == axis_descendant_or_self)
9757 if (step_push(ns, n, alloc) & once)
9760 xml_node_struct* cur = n->first_child;
9764 if (step_push(ns, cur, alloc) & once)
9767 if (cur->first_child)
9768 cur = cur->first_child;
9771 while (!cur->next_sibling)
9775 if (cur == n) return;
9778 cur = cur->next_sibling;
9785 case axis_following_sibling:
9787 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9788 if (step_push(ns, c, alloc) & once)
9794 case axis_preceding_sibling:
9796 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9797 if (step_push(ns, c, alloc) & once)
9803 case axis_following:
9805 xml_node_struct* cur = n;
9807 // exit from this node so that we don't include descendants
9808 while (!cur->next_sibling)
9815 cur = cur->next_sibling;
9819 if (step_push(ns, cur, alloc) & once)
9822 if (cur->first_child)
9823 cur = cur->first_child;
9826 while (!cur->next_sibling)
9833 cur = cur->next_sibling;
9840 case axis_preceding:
9842 xml_node_struct* cur = n;
9844 // exit from this node so that we don't include descendants
9845 while (!cur->prev_sibling_c->next_sibling)
9852 cur = cur->prev_sibling_c;
9856 if (cur->first_child)
9857 cur = cur->first_child->prev_sibling_c;
9860 // leaf node, can't be ancestor
9861 if (step_push(ns, cur, alloc) & once)
9864 while (!cur->prev_sibling_c->next_sibling)
9870 if (!node_is_ancestor(cur, n))
9871 if (step_push(ns, cur, alloc) & once)
9875 cur = cur->prev_sibling_c;
9883 case axis_ancestor_or_self:
9885 if (axis == axis_ancestor_or_self)
9886 if (step_push(ns, n, alloc) & once)
9889 xml_node_struct* cur = n->parent;
9893 if (step_push(ns, cur, alloc) & once)
9904 step_push(ns, n, alloc);
9912 step_push(ns, n->parent, alloc);
9918 assert(false && "Unimplemented axis");
9922 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9924 const axis_t axis = T::axis;
9929 case axis_ancestor_or_self:
9931 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9932 if (step_push(ns, a, p, alloc) & once)
9935 xml_node_struct* cur = p;
9939 if (step_push(ns, cur, alloc) & once)
9948 case axis_descendant_or_self:
9951 if (_test == nodetest_type_node) // reject attributes based on principal node type test
9952 step_push(ns, a, p, alloc);
9957 case axis_following:
9959 xml_node_struct* cur = p;
9963 if (cur->first_child)
9964 cur = cur->first_child;
9967 while (!cur->next_sibling)
9974 cur = cur->next_sibling;
9977 if (step_push(ns, cur, alloc) & once)
9986 step_push(ns, p, alloc);
9991 case axis_preceding:
9993 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
9994 step_fill(ns, p, alloc, once, v);
9999 assert(false && "Unimplemented axis");
10003 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10005 const axis_t axis = T::axis;
10006 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10009 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10010 else if (axis_has_attributes && xn.attribute() && xn.parent())
10011 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10014 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10016 const axis_t axis = T::axis;
10017 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10018 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10021 (axis == axis_attribute && _test == nodetest_name) ||
10022 (!_right && eval_once(axis_type, eval)) ||
10023 (_right && !_right->_next && _right->_test == predicate_constant_one);
10025 xpath_node_set_raw ns;
10026 ns.set_type(axis_type);
10030 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10032 // self axis preserves the original order
10033 if (axis == axis_self) ns.set_type(s.type());
10035 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10037 size_t size = ns.size();
10039 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10040 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10042 step_fill(ns, *it, stack.result, once, v);
10043 if (_right) apply_predicates(ns, size, stack, eval);
10048 step_fill(ns, c.n, stack.result, once, v);
10049 if (_right) apply_predicates(ns, 0, stack, eval);
10052 // child, attribute and self axes always generate unique set of nodes
10053 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10054 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10055 ns.remove_duplicates();
10061 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10062 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10064 assert(type == ast_string_constant);
10065 _data.string = value;
10068 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10069 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10071 assert(type == ast_number_constant);
10072 _data.number = value;
10075 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10076 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10078 assert(type == ast_variable);
10079 _data.variable = value;
10082 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10083 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10087 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10088 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10090 assert(type == ast_step);
10091 _data.nodetest = contents;
10094 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10095 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10097 assert(type == ast_filter || type == ast_predicate);
10100 void set_next(xpath_ast_node* value)
10105 void set_right(xpath_ast_node* value)
10110 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10115 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10118 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10121 return compare_eq(_left, _right, c, stack, equal_to());
10123 case ast_op_not_equal:
10124 return compare_eq(_left, _right, c, stack, not_equal_to());
10127 return compare_rel(_left, _right, c, stack, less());
10129 case ast_op_greater:
10130 return compare_rel(_right, _left, c, stack, less());
10132 case ast_op_less_or_equal:
10133 return compare_rel(_left, _right, c, stack, less_equal());
10135 case ast_op_greater_or_equal:
10136 return compare_rel(_right, _left, c, stack, less_equal());
10138 case ast_func_starts_with:
10140 xpath_allocator_capture cr(stack.result);
10142 xpath_string lr = _left->eval_string(c, stack);
10143 xpath_string rr = _right->eval_string(c, stack);
10145 return starts_with(lr.c_str(), rr.c_str());
10148 case ast_func_contains:
10150 xpath_allocator_capture cr(stack.result);
10152 xpath_string lr = _left->eval_string(c, stack);
10153 xpath_string rr = _right->eval_string(c, stack);
10155 return find_substring(lr.c_str(), rr.c_str()) != 0;
10158 case ast_func_boolean:
10159 return _left->eval_boolean(c, stack);
10162 return !_left->eval_boolean(c, stack);
10164 case ast_func_true:
10167 case ast_func_false:
10170 case ast_func_lang:
10172 if (c.n.attribute()) return false;
10174 xpath_allocator_capture cr(stack.result);
10176 xpath_string lang = _left->eval_string(c, stack);
10178 for (xml_node n = c.n.node(); n; n = n.parent())
10180 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10184 const char_t* value = a.value();
10186 // strnicmp / strncasecmp is not portable
10187 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10189 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10193 return *value == 0 || *value == '-';
10200 case ast_opt_compare_attribute:
10202 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10204 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10206 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10211 assert(_rettype == _data.variable->type());
10213 if (_rettype == xpath_type_boolean)
10214 return _data.variable->get_boolean();
10216 // fallthrough to type conversion
10223 case xpath_type_number:
10224 return convert_number_to_boolean(eval_number(c, stack));
10226 case xpath_type_string:
10228 xpath_allocator_capture cr(stack.result);
10230 return !eval_string(c, stack).empty();
10233 case xpath_type_node_set:
10235 xpath_allocator_capture cr(stack.result);
10237 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10241 assert(false && "Wrong expression for return type boolean");
10248 double eval_number(const xpath_context& c, const xpath_stack& stack)
10253 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10255 case ast_op_subtract:
10256 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10258 case ast_op_multiply:
10259 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10261 case ast_op_divide:
10262 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10265 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10267 case ast_op_negate:
10268 return -_left->eval_number(c, stack);
10270 case ast_number_constant:
10271 return _data.number;
10273 case ast_func_last:
10274 return static_cast<double>(c.size);
10276 case ast_func_position:
10277 return static_cast<double>(c.position);
10279 case ast_func_count:
10281 xpath_allocator_capture cr(stack.result);
10283 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10286 case ast_func_string_length_0:
10288 xpath_allocator_capture cr(stack.result);
10290 return static_cast<double>(string_value(c.n, stack.result).length());
10293 case ast_func_string_length_1:
10295 xpath_allocator_capture cr(stack.result);
10297 return static_cast<double>(_left->eval_string(c, stack).length());
10300 case ast_func_number_0:
10302 xpath_allocator_capture cr(stack.result);
10304 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10307 case ast_func_number_1:
10308 return _left->eval_number(c, stack);
10312 xpath_allocator_capture cr(stack.result);
10316 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10318 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10320 xpath_allocator_capture cri(stack.result);
10322 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10328 case ast_func_floor:
10330 double r = _left->eval_number(c, stack);
10332 return r == r ? floor(r) : r;
10335 case ast_func_ceiling:
10337 double r = _left->eval_number(c, stack);
10339 return r == r ? ceil(r) : r;
10342 case ast_func_round:
10343 return round_nearest_nzero(_left->eval_number(c, stack));
10347 assert(_rettype == _data.variable->type());
10349 if (_rettype == xpath_type_number)
10350 return _data.variable->get_number();
10352 // fallthrough to type conversion
10359 case xpath_type_boolean:
10360 return eval_boolean(c, stack) ? 1 : 0;
10362 case xpath_type_string:
10364 xpath_allocator_capture cr(stack.result);
10366 return convert_string_to_number(eval_string(c, stack).c_str());
10369 case xpath_type_node_set:
10371 xpath_allocator_capture cr(stack.result);
10373 return convert_string_to_number(eval_string(c, stack).c_str());
10377 assert(false && "Wrong expression for return type number");
10385 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10387 assert(_type == ast_func_concat);
10389 xpath_allocator_capture ct(stack.temp);
10391 // count the string number
10393 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10395 // gather all strings
10396 xpath_string static_buffer[4];
10397 xpath_string* buffer = static_buffer;
10399 // allocate on-heap for large concats
10400 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
10402 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10406 // evaluate all strings to temporary stack
10407 xpath_stack swapped_stack = {stack.temp, stack.result};
10409 buffer[0] = _left->eval_string(c, swapped_stack);
10412 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10413 assert(pos == count);
10415 // get total length
10417 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10419 // create final string
10420 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10423 char_t* ri = result;
10425 for (size_t j = 0; j < count; ++j)
10426 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10431 return xpath_string::from_heap_preallocated(result, ri);
10434 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10438 case ast_string_constant:
10439 return xpath_string::from_const(_data.string);
10441 case ast_func_local_name_0:
10443 xpath_node na = c.n;
10445 return xpath_string::from_const(local_name(na));
10448 case ast_func_local_name_1:
10450 xpath_allocator_capture cr(stack.result);
10452 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10453 xpath_node na = ns.first();
10455 return xpath_string::from_const(local_name(na));
10458 case ast_func_name_0:
10460 xpath_node na = c.n;
10462 return xpath_string::from_const(qualified_name(na));
10465 case ast_func_name_1:
10467 xpath_allocator_capture cr(stack.result);
10469 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10470 xpath_node na = ns.first();
10472 return xpath_string::from_const(qualified_name(na));
10475 case ast_func_namespace_uri_0:
10477 xpath_node na = c.n;
10479 return xpath_string::from_const(namespace_uri(na));
10482 case ast_func_namespace_uri_1:
10484 xpath_allocator_capture cr(stack.result);
10486 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10487 xpath_node na = ns.first();
10489 return xpath_string::from_const(namespace_uri(na));
10492 case ast_func_string_0:
10493 return string_value(c.n, stack.result);
10495 case ast_func_string_1:
10496 return _left->eval_string(c, stack);
10498 case ast_func_concat:
10499 return eval_string_concat(c, stack);
10501 case ast_func_substring_before:
10503 xpath_allocator_capture cr(stack.temp);
10505 xpath_stack swapped_stack = {stack.temp, stack.result};
10507 xpath_string s = _left->eval_string(c, swapped_stack);
10508 xpath_string p = _right->eval_string(c, swapped_stack);
10510 const char_t* pos = find_substring(s.c_str(), p.c_str());
10512 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10515 case ast_func_substring_after:
10517 xpath_allocator_capture cr(stack.temp);
10519 xpath_stack swapped_stack = {stack.temp, stack.result};
10521 xpath_string s = _left->eval_string(c, swapped_stack);
10522 xpath_string p = _right->eval_string(c, swapped_stack);
10524 const char_t* pos = find_substring(s.c_str(), p.c_str());
10525 if (!pos) return xpath_string();
10527 const char_t* rbegin = pos + p.length();
10528 const char_t* rend = s.c_str() + s.length();
10530 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10533 case ast_func_substring_2:
10535 xpath_allocator_capture cr(stack.temp);
10537 xpath_stack swapped_stack = {stack.temp, stack.result};
10539 xpath_string s = _left->eval_string(c, swapped_stack);
10540 size_t s_length = s.length();
10542 double first = round_nearest(_right->eval_number(c, stack));
10544 if (is_nan(first)) return xpath_string(); // NaN
10545 else if (first >= s_length + 1) return xpath_string();
10547 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10548 assert(1 <= pos && pos <= s_length + 1);
10550 const char_t* rbegin = s.c_str() + (pos - 1);
10551 const char_t* rend = s.c_str() + s.length();
10553 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10556 case ast_func_substring_3:
10558 xpath_allocator_capture cr(stack.temp);
10560 xpath_stack swapped_stack = {stack.temp, stack.result};
10562 xpath_string s = _left->eval_string(c, swapped_stack);
10563 size_t s_length = s.length();
10565 double first = round_nearest(_right->eval_number(c, stack));
10566 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10568 if (is_nan(first) || is_nan(last)) return xpath_string();
10569 else if (first >= s_length + 1) return xpath_string();
10570 else if (first >= last) return xpath_string();
10571 else if (last < 1) return xpath_string();
10573 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10574 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10576 assert(1 <= pos && pos <= end && end <= s_length + 1);
10577 const char_t* rbegin = s.c_str() + (pos - 1);
10578 const char_t* rend = s.c_str() + (end - 1);
10580 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10583 case ast_func_normalize_space_0:
10585 xpath_string s = string_value(c.n, stack.result);
10587 char_t* begin = s.data(stack.result);
10588 char_t* end = normalize_space(begin);
10590 return xpath_string::from_heap_preallocated(begin, end);
10593 case ast_func_normalize_space_1:
10595 xpath_string s = _left->eval_string(c, stack);
10597 char_t* begin = s.data(stack.result);
10598 char_t* end = normalize_space(begin);
10600 return xpath_string::from_heap_preallocated(begin, end);
10603 case ast_func_translate:
10605 xpath_allocator_capture cr(stack.temp);
10607 xpath_stack swapped_stack = {stack.temp, stack.result};
10609 xpath_string s = _left->eval_string(c, stack);
10610 xpath_string from = _right->eval_string(c, swapped_stack);
10611 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10613 char_t* begin = s.data(stack.result);
10614 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10616 return xpath_string::from_heap_preallocated(begin, end);
10619 case ast_opt_translate_table:
10621 xpath_string s = _left->eval_string(c, stack);
10623 char_t* begin = s.data(stack.result);
10624 char_t* end = translate_table(begin, _data.table);
10626 return xpath_string::from_heap_preallocated(begin, end);
10631 assert(_rettype == _data.variable->type());
10633 if (_rettype == xpath_type_string)
10634 return xpath_string::from_const(_data.variable->get_string());
10636 // fallthrough to type conversion
10643 case xpath_type_boolean:
10644 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10646 case xpath_type_number:
10647 return convert_number_to_string(eval_number(c, stack), stack.result);
10649 case xpath_type_node_set:
10651 xpath_allocator_capture cr(stack.temp);
10653 xpath_stack swapped_stack = {stack.temp, stack.result};
10655 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10656 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10660 assert(false && "Wrong expression for return type string");
10661 return xpath_string();
10667 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10673 xpath_allocator_capture cr(stack.temp);
10675 xpath_stack swapped_stack = {stack.temp, stack.result};
10677 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10678 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10680 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10681 rs.set_type(xpath_node_set::type_unsorted);
10683 rs.append(ls.begin(), ls.end(), stack.result);
10684 rs.remove_duplicates();
10691 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10693 // either expression is a number or it contains position() call; sort by document order
10694 if (_test != predicate_posinv) set.sort_do();
10696 bool once = eval_once(set.type(), eval);
10698 apply_predicate(set, 0, stack, once);
10704 return xpath_node_set_raw();
10710 case axis_ancestor:
10711 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10713 case axis_ancestor_or_self:
10714 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10716 case axis_attribute:
10717 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10720 return step_do(c, stack, eval, axis_to_type<axis_child>());
10722 case axis_descendant:
10723 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10725 case axis_descendant_or_self:
10726 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10728 case axis_following:
10729 return step_do(c, stack, eval, axis_to_type<axis_following>());
10731 case axis_following_sibling:
10732 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10734 case axis_namespace:
10735 // namespaced axis is not supported
10736 return xpath_node_set_raw();
10739 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10741 case axis_preceding:
10742 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10744 case axis_preceding_sibling:
10745 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10748 return step_do(c, stack, eval, axis_to_type<axis_self>());
10751 assert(false && "Unknown axis");
10752 return xpath_node_set_raw();
10756 case ast_step_root:
10758 assert(!_right); // root step can't have any predicates
10760 xpath_node_set_raw ns;
10762 ns.set_type(xpath_node_set::type_sorted);
10764 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10765 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10772 assert(_rettype == _data.variable->type());
10774 if (_rettype == xpath_type_node_set)
10776 const xpath_node_set& s = _data.variable->get_node_set();
10778 xpath_node_set_raw ns;
10780 ns.set_type(s.type());
10781 ns.append(s.begin(), s.end(), stack.result);
10786 // fallthrough to type conversion
10790 assert(false && "Wrong expression for return type node set");
10791 return xpath_node_set_raw();
10795 void optimize(xpath_allocator* alloc)
10797 if (_left) _left->optimize(alloc);
10798 if (_right) _right->optimize(alloc);
10799 if (_next) _next->optimize(alloc);
10801 optimize_self(alloc);
10804 void optimize_self(xpath_allocator* alloc)
10806 // Rewrite [position()=expr] with [expr]
10807 // Note that this step has to go before classification to recognize [position()=1]
10808 if ((_type == ast_filter || _type == ast_predicate) &&
10809 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10811 _right = _right->_right;
10814 // Classify filter/predicate ops to perform various optimizations during evaluation
10815 if (_type == ast_filter || _type == ast_predicate)
10817 assert(_test == predicate_default);
10819 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10820 _test = predicate_constant_one;
10821 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10822 _test = predicate_constant;
10823 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10824 _test = predicate_posinv;
10827 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10828 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10829 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10830 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10831 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10832 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10835 if (_axis == axis_child || _axis == axis_descendant)
10836 _axis = axis_descendant;
10838 _axis = axis_descendant_or_self;
10840 _left = _left->_left;
10843 // Use optimized lookup table implementation for translate() with constant arguments
10844 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10846 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10850 _type = ast_opt_translate_table;
10851 _data.table = table;
10855 // Use optimized path for @attr = 'value' or @attr = $value
10856 if (_type == ast_op_equal &&
10857 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10858 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10860 _type = ast_opt_compare_attribute;
10864 bool is_posinv_expr() const
10868 case ast_func_position:
10869 case ast_func_last:
10872 case ast_string_constant:
10873 case ast_number_constant:
10878 case ast_step_root:
10881 case ast_predicate:
10886 if (_left && !_left->is_posinv_expr()) return false;
10888 for (xpath_ast_node* n = _right; n; n = n->_next)
10889 if (!n->is_posinv_expr()) return false;
10895 bool is_posinv_step() const
10897 assert(_type == ast_step);
10899 for (xpath_ast_node* n = _right; n; n = n->_next)
10901 assert(n->_type == ast_predicate);
10903 if (n->_test != predicate_posinv)
10910 xpath_value_type rettype() const
10912 return static_cast<xpath_value_type>(_rettype);
10916 struct xpath_parser
10918 xpath_allocator* _alloc;
10919 xpath_lexer _lexer;
10921 const char_t* _query;
10922 xpath_variable_set* _variables;
10924 xpath_parse_result* _result;
10926 char_t _scratch[32];
10928 #ifdef PUGIXML_NO_EXCEPTIONS
10929 jmp_buf _error_handler;
10932 void throw_error(const char* message)
10934 _result->error = message;
10935 _result->offset = _lexer.current_pos() - _query;
10937 #ifdef PUGIXML_NO_EXCEPTIONS
10938 longjmp(_error_handler, 1);
10940 throw xpath_exception(*_result);
10944 void throw_error_oom()
10946 #ifdef PUGIXML_NO_EXCEPTIONS
10947 throw_error("Out of memory");
10949 throw std::bad_alloc();
10955 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10957 if (!result) throw_error_oom();
10962 const char_t* alloc_string(const xpath_lexer_string& value)
10966 size_t length = static_cast<size_t>(value.end - value.begin);
10968 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10969 if (!c) throw_error_oom();
10970 assert(c); // workaround for clang static analysis
10972 memcpy(c, value.begin, length * sizeof(char_t));
10980 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
10984 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10986 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10989 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
10991 switch (name.begin[0])
10994 if (name == PUGIXML_TEXT("boolean") && argc == 1)
10995 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
11000 if (name == PUGIXML_TEXT("count") && argc == 1)
11002 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11003 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
11005 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11006 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11007 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11008 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11009 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11010 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
11015 if (name == PUGIXML_TEXT("false") && argc == 0)
11016 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
11017 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11018 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
11023 if (name == PUGIXML_TEXT("id") && argc == 1)
11024 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
11029 if (name == PUGIXML_TEXT("last") && argc == 0)
11030 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
11031 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11032 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
11033 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11034 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
11039 if (name == PUGIXML_TEXT("name") && argc <= 1)
11040 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
11041 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11042 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
11043 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11044 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11045 else if (name == PUGIXML_TEXT("not") && argc == 1)
11046 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
11047 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11048 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11053 if (name == PUGIXML_TEXT("position") && argc == 0)
11054 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
11059 if (name == PUGIXML_TEXT("round") && argc == 1)
11060 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
11065 if (name == PUGIXML_TEXT("string") && argc <= 1)
11066 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11067 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11068 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11069 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11070 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11071 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11072 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11073 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11074 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11075 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11076 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11077 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11079 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11080 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
11086 if (name == PUGIXML_TEXT("translate") && argc == 3)
11087 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11088 else if (name == PUGIXML_TEXT("true") && argc == 0)
11089 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
11097 throw_error("Unrecognized function or wrong parameter count");
11102 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11106 switch (name.begin[0])
11109 if (name == PUGIXML_TEXT("ancestor"))
11110 return axis_ancestor;
11111 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11112 return axis_ancestor_or_self;
11113 else if (name == PUGIXML_TEXT("attribute"))
11114 return axis_attribute;
11119 if (name == PUGIXML_TEXT("child"))
11125 if (name == PUGIXML_TEXT("descendant"))
11126 return axis_descendant;
11127 else if (name == PUGIXML_TEXT("descendant-or-self"))
11128 return axis_descendant_or_self;
11133 if (name == PUGIXML_TEXT("following"))
11134 return axis_following;
11135 else if (name == PUGIXML_TEXT("following-sibling"))
11136 return axis_following_sibling;
11141 if (name == PUGIXML_TEXT("namespace"))
11142 return axis_namespace;
11147 if (name == PUGIXML_TEXT("parent"))
11148 return axis_parent;
11149 else if (name == PUGIXML_TEXT("preceding"))
11150 return axis_preceding;
11151 else if (name == PUGIXML_TEXT("preceding-sibling"))
11152 return axis_preceding_sibling;
11157 if (name == PUGIXML_TEXT("self"))
11170 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11172 switch (name.begin[0])
11175 if (name == PUGIXML_TEXT("comment"))
11176 return nodetest_type_comment;
11181 if (name == PUGIXML_TEXT("node"))
11182 return nodetest_type_node;
11187 if (name == PUGIXML_TEXT("processing-instruction"))
11188 return nodetest_type_pi;
11193 if (name == PUGIXML_TEXT("text"))
11194 return nodetest_type_text;
11202 return nodetest_none;
11205 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11206 xpath_ast_node* parse_primary_expression()
11208 switch (_lexer.current())
11212 xpath_lexer_string name = _lexer.contents();
11215 throw_error("Unknown variable: variable set is not provided");
11217 xpath_variable* var = 0;
11218 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11222 throw_error("Unknown variable: variable set does not contain the given name");
11226 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
11229 case lex_open_brace:
11233 xpath_ast_node* n = parse_expression();
11235 if (_lexer.current() != lex_close_brace)
11236 throw_error("Unmatched braces");
11243 case lex_quoted_string:
11245 const char_t* value = alloc_string(_lexer.contents());
11247 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
11257 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11260 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
11268 xpath_ast_node* args[2] = {0};
11271 xpath_lexer_string function = _lexer.contents();
11274 xpath_ast_node* last_arg = 0;
11276 if (_lexer.current() != lex_open_brace)
11277 throw_error("Unrecognized function call");
11280 if (_lexer.current() != lex_close_brace)
11281 args[argc++] = parse_expression();
11283 while (_lexer.current() != lex_close_brace)
11285 if (_lexer.current() != lex_comma)
11286 throw_error("No comma between function arguments");
11289 xpath_ast_node* n = parse_expression();
11291 if (argc < 2) args[argc] = n;
11292 else last_arg->set_next(n);
11300 return parse_function(function, argc, args);
11304 throw_error("Unrecognizable primary expression");
11310 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11311 // Predicate ::= '[' PredicateExpr ']'
11312 // PredicateExpr ::= Expr
11313 xpath_ast_node* parse_filter_expression()
11315 xpath_ast_node* n = parse_primary_expression();
11317 while (_lexer.current() == lex_open_square_brace)
11321 xpath_ast_node* expr = parse_expression();
11323 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
11325 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
11327 if (_lexer.current() != lex_close_square_brace)
11328 throw_error("Unmatched square brace");
11336 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11337 // AxisSpecifier ::= AxisName '::' | '@'?
11338 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11339 // NameTest ::= '*' | NCName ':' '*' | QName
11340 // AbbreviatedStep ::= '.' | '..'
11341 xpath_ast_node* parse_step(xpath_ast_node* set)
11343 if (set && set->rettype() != xpath_type_node_set)
11344 throw_error("Step has to be applied to node set");
11346 bool axis_specified = false;
11347 axis_t axis = axis_child; // implied child axis
11349 if (_lexer.current() == lex_axis_attribute)
11351 axis = axis_attribute;
11352 axis_specified = true;
11356 else if (_lexer.current() == lex_dot)
11360 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
11362 else if (_lexer.current() == lex_double_dot)
11366 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11369 nodetest_t nt_type = nodetest_none;
11370 xpath_lexer_string nt_name;
11372 if (_lexer.current() == lex_string)
11375 nt_name = _lexer.contents();
11378 // was it an axis name?
11379 if (_lexer.current() == lex_double_colon)
11382 if (axis_specified) throw_error("Two axis specifiers in one step");
11384 axis = parse_axis_name(nt_name, axis_specified);
11386 if (!axis_specified) throw_error("Unknown axis");
11388 // read actual node test
11391 if (_lexer.current() == lex_multiply)
11393 nt_type = nodetest_all;
11394 nt_name = xpath_lexer_string();
11397 else if (_lexer.current() == lex_string)
11399 nt_name = _lexer.contents();
11402 else throw_error("Unrecognized node test");
11405 if (nt_type == nodetest_none)
11407 // node type test or processing-instruction
11408 if (_lexer.current() == lex_open_brace)
11412 if (_lexer.current() == lex_close_brace)
11416 nt_type = parse_node_test_type(nt_name);
11418 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
11420 nt_name = xpath_lexer_string();
11422 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11424 if (_lexer.current() != lex_quoted_string)
11425 throw_error("Only literals are allowed as arguments to processing-instruction()");
11427 nt_type = nodetest_pi;
11428 nt_name = _lexer.contents();
11431 if (_lexer.current() != lex_close_brace)
11432 throw_error("Unmatched brace near processing-instruction()");
11436 throw_error("Unmatched brace near node type test");
11439 // QName or NCName:*
11442 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11444 nt_name.end--; // erase *
11446 nt_type = nodetest_all_in_namespace;
11448 else nt_type = nodetest_name;
11452 else if (_lexer.current() == lex_multiply)
11454 nt_type = nodetest_all;
11457 else throw_error("Unrecognized node test");
11459 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
11461 xpath_ast_node* last = 0;
11463 while (_lexer.current() == lex_open_square_brace)
11467 xpath_ast_node* expr = parse_expression();
11469 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11471 if (_lexer.current() != lex_close_square_brace)
11472 throw_error("Unmatched square brace");
11475 if (last) last->set_next(pred);
11476 else n->set_right(pred);
11484 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11485 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11487 xpath_ast_node* n = parse_step(set);
11489 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11491 lexeme_t l = _lexer.current();
11494 if (l == lex_double_slash)
11495 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11503 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11504 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11505 xpath_ast_node* parse_location_path()
11507 if (_lexer.current() == lex_slash)
11511 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11513 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11514 lexeme_t l = _lexer.current();
11516 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11517 return parse_relative_location_path(n);
11521 else if (_lexer.current() == lex_double_slash)
11525 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11526 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11528 return parse_relative_location_path(n);
11531 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11532 return parse_relative_location_path(0);
11535 // PathExpr ::= LocationPath
11537 // | FilterExpr '/' RelativeLocationPath
11538 // | FilterExpr '//' RelativeLocationPath
11539 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11540 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11541 xpath_ast_node* parse_path_or_unary_expression()
11544 // PathExpr begins with either LocationPath or FilterExpr.
11545 // FilterExpr begins with PrimaryExpr
11546 // PrimaryExpr begins with '$' in case of it being a variable reference,
11547 // '(' in case of it being an expression, string literal, number constant or
11550 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11551 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11552 _lexer.current() == lex_string)
11554 if (_lexer.current() == lex_string)
11556 // This is either a function call, or not - if not, we shall proceed with location path
11557 const char_t* state = _lexer.state();
11559 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11561 if (*state != '(') return parse_location_path();
11563 // This looks like a function call; however this still can be a node-test. Check it.
11564 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
11567 xpath_ast_node* n = parse_filter_expression();
11569 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11571 lexeme_t l = _lexer.current();
11574 if (l == lex_double_slash)
11576 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
11578 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11581 // select from location path
11582 return parse_relative_location_path(n);
11587 else if (_lexer.current() == lex_minus)
11591 // precedence 7+ - only parses union expressions
11592 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
11594 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11597 return parse_location_path();
11602 ast_type_t asttype;
11603 xpath_value_type rettype;
11606 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11610 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11614 static binary_op_t parse(xpath_lexer& lexer)
11616 switch (lexer.current())
11619 if (lexer.contents() == PUGIXML_TEXT("or"))
11620 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11621 else if (lexer.contents() == PUGIXML_TEXT("and"))
11622 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11623 else if (lexer.contents() == PUGIXML_TEXT("div"))
11624 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11625 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11626 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11628 return binary_op_t();
11631 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11633 case lex_not_equal:
11634 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11637 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11640 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11642 case lex_less_or_equal:
11643 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11645 case lex_greater_or_equal:
11646 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11649 return binary_op_t(ast_op_add, xpath_type_number, 5);
11652 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11655 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11658 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11661 return binary_op_t();
11666 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11668 binary_op_t op = binary_op_t::parse(_lexer);
11670 while (op.asttype != ast_unknown && op.precedence >= limit)
11674 xpath_ast_node* rhs = parse_path_or_unary_expression();
11676 binary_op_t nextop = binary_op_t::parse(_lexer);
11678 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11680 rhs = parse_expression_rec(rhs, nextop.precedence);
11682 nextop = binary_op_t::parse(_lexer);
11685 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11686 throw_error("Union operator has to be applied to node sets");
11688 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11690 op = binary_op_t::parse(_lexer);
11697 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11698 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11699 // EqualityExpr ::= RelationalExpr
11700 // | EqualityExpr '=' RelationalExpr
11701 // | EqualityExpr '!=' RelationalExpr
11702 // RelationalExpr ::= AdditiveExpr
11703 // | RelationalExpr '<' AdditiveExpr
11704 // | RelationalExpr '>' AdditiveExpr
11705 // | RelationalExpr '<=' AdditiveExpr
11706 // | RelationalExpr '>=' AdditiveExpr
11707 // AdditiveExpr ::= MultiplicativeExpr
11708 // | AdditiveExpr '+' MultiplicativeExpr
11709 // | AdditiveExpr '-' MultiplicativeExpr
11710 // MultiplicativeExpr ::= UnaryExpr
11711 // | MultiplicativeExpr '*' UnaryExpr
11712 // | MultiplicativeExpr 'div' UnaryExpr
11713 // | MultiplicativeExpr 'mod' UnaryExpr
11714 xpath_ast_node* parse_expression()
11716 return parse_expression_rec(parse_path_or_unary_expression(), 0);
11719 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11723 xpath_ast_node* parse()
11725 xpath_ast_node* result = parse_expression();
11727 if (_lexer.current() != lex_eof)
11729 // there are still unparsed tokens left, error
11730 throw_error("Incorrect query");
11736 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11738 xpath_parser parser(query, variables, alloc, result);
11740 #ifdef PUGIXML_NO_EXCEPTIONS
11741 int error = setjmp(parser._error_handler);
11743 return (error == 0) ? parser.parse() : 0;
11745 return parser.parse();
11750 struct xpath_query_impl
11752 static xpath_query_impl* create()
11754 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11755 if (!memory) return 0;
11757 return new (memory) xpath_query_impl();
11760 static void destroy(xpath_query_impl* impl)
11762 // free all allocated pages
11763 impl->alloc.release();
11765 // free allocator memory (with the first page)
11766 xml_memory::deallocate(impl);
11769 xpath_query_impl(): root(0), alloc(&block)
11772 block.capacity = sizeof(block.data);
11775 xpath_ast_node* root;
11776 xpath_allocator alloc;
11777 xpath_memory_block block;
11780 PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
11782 if (!impl) return xpath_string();
11784 #ifdef PUGIXML_NO_EXCEPTIONS
11785 if (setjmp(sd.error_handler)) return xpath_string();
11788 xpath_context c(n, 1, 1);
11790 return impl->root->eval_string(c, sd.stack);
11793 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11795 if (!impl) return 0;
11797 if (impl->root->rettype() != xpath_type_node_set)
11799 #ifdef PUGIXML_NO_EXCEPTIONS
11802 xpath_parse_result res;
11803 res.error = "Expression does not evaluate to node set";
11805 throw xpath_exception(res);
11815 #ifndef PUGIXML_NO_EXCEPTIONS
11816 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11818 assert(_result.error);
11821 PUGI__FN const char* xpath_exception::what() const throw()
11823 return _result.error;
11826 PUGI__FN const xpath_parse_result& xpath_exception::result() const
11832 PUGI__FN xpath_node::xpath_node()
11836 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11840 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11844 PUGI__FN xml_node xpath_node::node() const
11846 return _attribute ? xml_node() : _node;
11849 PUGI__FN xml_attribute xpath_node::attribute() const
11854 PUGI__FN xml_node xpath_node::parent() const
11856 return _attribute ? _node : _node.parent();
11859 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11863 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11865 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11868 PUGI__FN bool xpath_node::operator!() const
11870 return !(_node || _attribute);
11873 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11875 return _node == n._node && _attribute == n._attribute;
11878 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11880 return _node != n._node || _attribute != n._attribute;
11883 #ifdef __BORLANDC__
11884 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
11886 return (bool)lhs && rhs;
11889 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
11891 return (bool)lhs || rhs;
11895 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
11897 assert(begin_ <= end_);
11899 size_t size_ = static_cast<size_t>(end_ - begin_);
11903 // deallocate old buffer
11904 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11906 // use internal buffer
11907 if (begin_ != end_) _storage = *begin_;
11909 _begin = &_storage;
11910 _end = &_storage + size_;
11916 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11920 #ifdef PUGIXML_NO_EXCEPTIONS
11923 throw std::bad_alloc();
11927 memcpy(storage, begin_, size_ * sizeof(xpath_node));
11929 // deallocate old buffer
11930 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11934 _end = storage + size_;
11939 #if __cplusplus >= 201103
11940 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
11943 _storage = rhs._storage;
11944 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11945 _end = _begin + (rhs._end - rhs._begin);
11947 rhs._type = type_unsorted;
11948 rhs._begin = &rhs._storage;
11949 rhs._end = rhs._begin;
11953 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11957 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11959 _assign(begin_, end_, type_);
11962 PUGI__FN xpath_node_set::~xpath_node_set()
11964 if (_begin != &_storage)
11965 impl::xml_memory::deallocate(_begin);
11968 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11970 _assign(ns._begin, ns._end, ns._type);
11973 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
11975 if (this == &ns) return *this;
11977 _assign(ns._begin, ns._end, ns._type);
11982 #if __cplusplus >= 201103
11983 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11988 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
11990 if (this == &rhs) return *this;
11992 if (_begin != &_storage)
11993 impl::xml_memory::deallocate(_begin);
12001 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12006 PUGI__FN size_t xpath_node_set::size() const
12008 return _end - _begin;
12011 PUGI__FN bool xpath_node_set::empty() const
12013 return _begin == _end;
12016 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12018 assert(index < size());
12019 return _begin[index];
12022 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12027 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12032 PUGI__FN void xpath_node_set::sort(bool reverse)
12034 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12037 PUGI__FN xpath_node xpath_node_set::first() const
12039 return impl::xpath_first(_begin, _end, _type);
12042 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12046 PUGI__FN xpath_parse_result::operator bool() const
12051 PUGI__FN const char* xpath_parse_result::description() const
12053 return error ? error : "No error";
12056 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12060 PUGI__FN const char_t* xpath_variable::name() const
12064 case xpath_type_node_set:
12065 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12067 case xpath_type_number:
12068 return static_cast<const impl::xpath_variable_number*>(this)->name;
12070 case xpath_type_string:
12071 return static_cast<const impl::xpath_variable_string*>(this)->name;
12073 case xpath_type_boolean:
12074 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12077 assert(false && "Invalid variable type");
12082 PUGI__FN xpath_value_type xpath_variable::type() const
12087 PUGI__FN bool xpath_variable::get_boolean() const
12089 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12092 PUGI__FN double xpath_variable::get_number() const
12094 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12097 PUGI__FN const char_t* xpath_variable::get_string() const
12099 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12100 return value ? value : PUGIXML_TEXT("");
12103 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12105 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12108 PUGI__FN bool xpath_variable::set(bool value)
12110 if (_type != xpath_type_boolean) return false;
12112 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12116 PUGI__FN bool xpath_variable::set(double value)
12118 if (_type != xpath_type_number) return false;
12120 static_cast<impl::xpath_variable_number*>(this)->value = value;
12124 PUGI__FN bool xpath_variable::set(const char_t* value)
12126 if (_type != xpath_type_string) return false;
12128 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12130 // duplicate string
12131 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12133 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12134 if (!copy) return false;
12136 memcpy(copy, value, size);
12138 // replace old string
12139 if (var->value) impl::xml_memory::deallocate(var->value);
12145 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12147 if (_type != xpath_type_node_set) return false;
12149 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12153 PUGI__FN xpath_variable_set::xpath_variable_set()
12155 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12159 PUGI__FN xpath_variable_set::~xpath_variable_set()
12161 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12162 _destroy(_data[i]);
12165 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12167 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12173 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12175 if (this == &rhs) return *this;
12182 #if __cplusplus >= 201103
12183 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
12185 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12187 _data[i] = rhs._data[i];
12192 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
12194 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12196 _destroy(_data[i]);
12198 _data[i] = rhs._data[i];
12206 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12208 xpath_variable_set temp;
12210 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12211 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12217 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12219 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12221 xpath_variable* chain = _data[i];
12223 _data[i] = rhs._data[i];
12224 rhs._data[i] = chain;
12228 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12233 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12234 size_t hash = impl::hash_string(name) % hash_size;
12236 // look for existing variable
12237 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12238 if (var->name() && impl::strequal(var->name(), name))
12244 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12246 xpath_variable* last = 0;
12250 // allocate storage for new variable
12251 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12252 if (!nvar) return false;
12254 // link the variable to the result immediately to handle failures gracefully
12256 last->_next = nvar;
12258 *out_result = nvar;
12262 // copy the value; this can fail due to out-of-memory conditions
12263 if (!impl::copy_xpath_variable(nvar, var)) return false;
12271 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12275 xpath_variable* next = var->_next;
12277 impl::delete_xpath_variable(var->_type, var);
12283 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12288 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12289 size_t hash = impl::hash_string(name) % hash_size;
12291 // look for existing variable
12292 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12293 if (var->name() && impl::strequal(var->name(), name))
12294 return var->type() == type ? var : 0;
12296 // add new variable
12297 xpath_variable* result = impl::new_xpath_variable(type, name);
12301 result->_next = _data[hash];
12303 _data[hash] = result;
12309 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12311 xpath_variable* var = add(name, xpath_type_boolean);
12312 return var ? var->set(value) : false;
12315 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12317 xpath_variable* var = add(name, xpath_type_number);
12318 return var ? var->set(value) : false;
12321 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12323 xpath_variable* var = add(name, xpath_type_string);
12324 return var ? var->set(value) : false;
12327 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12329 xpath_variable* var = add(name, xpath_type_node_set);
12330 return var ? var->set(value) : false;
12333 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12335 return _find(name);
12338 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12340 return _find(name);
12343 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12345 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12349 #ifdef PUGIXML_NO_EXCEPTIONS
12350 _result.error = "Out of memory";
12352 throw std::bad_alloc();
12357 using impl::auto_deleter; // MSVC7 workaround
12358 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12360 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12364 qimpl->root->optimize(&qimpl->alloc);
12366 _impl = impl.release();
12372 PUGI__FN xpath_query::xpath_query(): _impl(0)
12376 PUGI__FN xpath_query::~xpath_query()
12379 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12382 #if __cplusplus >= 201103
12383 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
12386 _result = rhs._result;
12388 rhs._result = xpath_parse_result();
12391 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
12393 if (this == &rhs) return *this;
12396 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12399 _result = rhs._result;
12401 rhs._result = xpath_parse_result();
12407 PUGI__FN xpath_value_type xpath_query::return_type() const
12409 if (!_impl) return xpath_type_none;
12411 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12414 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12416 if (!_impl) return false;
12418 impl::xpath_context c(n, 1, 1);
12419 impl::xpath_stack_data sd;
12421 #ifdef PUGIXML_NO_EXCEPTIONS
12422 if (setjmp(sd.error_handler)) return false;
12425 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12428 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12430 if (!_impl) return impl::gen_nan();
12432 impl::xpath_context c(n, 1, 1);
12433 impl::xpath_stack_data sd;
12435 #ifdef PUGIXML_NO_EXCEPTIONS
12436 if (setjmp(sd.error_handler)) return impl::gen_nan();
12439 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12442 #ifndef PUGIXML_NO_STL
12443 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12445 impl::xpath_stack_data sd;
12447 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12449 return string_t(r.c_str(), r.length());
12453 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12455 impl::xpath_stack_data sd;
12457 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12459 size_t full_size = r.length() + 1;
12463 size_t size = (full_size < capacity) ? full_size : capacity;
12466 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12467 buffer[size - 1] = 0;
12473 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12475 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12476 if (!root) return xpath_node_set();
12478 impl::xpath_context c(n, 1, 1);
12479 impl::xpath_stack_data sd;
12481 #ifdef PUGIXML_NO_EXCEPTIONS
12482 if (setjmp(sd.error_handler)) return xpath_node_set();
12485 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12487 return xpath_node_set(r.begin(), r.end(), r.type());
12490 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12492 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12493 if (!root) return xpath_node();
12495 impl::xpath_context c(n, 1, 1);
12496 impl::xpath_stack_data sd;
12498 #ifdef PUGIXML_NO_EXCEPTIONS
12499 if (setjmp(sd.error_handler)) return xpath_node();
12502 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12507 PUGI__FN const xpath_parse_result& xpath_query::result() const
12512 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12516 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12518 return _impl ? unspecified_bool_xpath_query : 0;
12521 PUGI__FN bool xpath_query::operator!() const
12526 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12528 xpath_query q(query, variables);
12529 return select_node(q);
12532 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12534 return query.evaluate_node(*this);
12537 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12539 xpath_query q(query, variables);
12540 return select_nodes(q);
12543 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12545 return query.evaluate_node_set(*this);
12548 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12550 xpath_query q(query, variables);
12551 return select_single_node(q);
12554 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12556 return query.evaluate_node(*this);
12562 #ifdef __BORLANDC__
12563 # pragma option pop
12566 // Intel C++ does not properly keep warning state for function templates,
12567 // so popping warning state at the end of translation unit leads to warnings in the middle.
12568 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12569 # pragma warning(pop)
12572 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12573 #undef PUGI__NO_INLINE
12574 #undef PUGI__UNLIKELY
12575 #undef PUGI__STATIC_ASSERT
12576 #undef PUGI__DMC_VOLATILE
12577 #undef PUGI__MSVC_CRT_VERSION
12578 #undef PUGI__NS_BEGIN
12579 #undef PUGI__NS_END
12581 #undef PUGI__FN_NO_INLINE
12582 #undef PUGI__GETHEADER_IMPL
12583 #undef PUGI__GETPAGE_IMPL
12584 #undef PUGI__GETPAGE
12585 #undef PUGI__NODETYPE
12586 #undef PUGI__IS_CHARTYPE_IMPL
12587 #undef PUGI__IS_CHARTYPE
12588 #undef PUGI__IS_CHARTYPEX
12589 #undef PUGI__ENDSWITH
12590 #undef PUGI__SKIPWS
12591 #undef PUGI__OPTSET
12592 #undef PUGI__PUSHNODE
12593 #undef PUGI__POPNODE
12594 #undef PUGI__SCANFOR
12595 #undef PUGI__SCANWHILE
12596 #undef PUGI__SCANWHILE_UNROLL
12597 #undef PUGI__ENDSEG
12598 #undef PUGI__THROW_ERROR
12599 #undef PUGI__CHECK_ERROR
12604 * Copyright (c) 2006-2016 Arseny Kapoulkine
12606 * Permission is hereby granted, free of charge, to any person
12607 * obtaining a copy of this software and associated documentation
12608 * files (the "Software"), to deal in the Software without
12609 * restriction, including without limitation the rights to use,
12610 * copy, modify, merge, publish, distribute, sublicense, and/or sell
12611 * copies of the Software, and to permit persons to whom the
12612 * Software is furnished to do so, subject to the following
12615 * The above copyright notice and this permission notice shall be
12616 * included in all copies or substantial portions of the Software.
12618 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12619 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12620 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12621 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12622 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12623 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12624 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12625 * OTHER DEALINGS IN THE SOFTWARE.