Publishing R3
[platform/upstream/dldt.git] / inference-engine / thirdparty / pugixml / src / pugixml.cpp
1 /**
2  * pugixml parser - version 1.7
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16
17 #include "pugixml.hpp"
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24
25 #ifdef PUGIXML_WCHAR_MODE
26 #       include <wchar.h>
27 #endif
28
29 #ifndef PUGIXML_NO_XPATH
30 #       include <math.h>
31 #       include <float.h>
32 #       ifdef PUGIXML_NO_EXCEPTIONS
33 #               include <setjmp.h>
34 #       endif
35 #endif
36
37 #ifndef PUGIXML_NO_STL
38 #       include <istream>
39 #       include <ostream>
40 #       include <string>
41 #endif
42
43 // For placement new
44 #include <new>
45
46 #ifdef _MSC_VER
47 #       pragma warning(push)
48 #       pragma warning(disable: 4127) // conditional expression is constant
49 #       pragma warning(disable: 4324) // structure was padded due to __declspec(align())
50 #       pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
51 #       pragma warning(disable: 4702) // unreachable code
52 #       pragma warning(disable: 4996) // this function or variable may be unsafe
53 #       pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
54 #endif
55
56 #ifdef __INTEL_COMPILER
57 #       pragma warning(disable: 177) // function was declared but never referenced
58 #       pragma warning(disable: 279) // controlling expression is constant
59 #       pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #       pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #       pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66
67 #ifdef __BORLANDC__
68 #       pragma option push
69 #       pragma warn -8008 // condition is always false
70 #       pragma warn -8066 // unreachable code
71 #endif
72
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #       pragma diag_suppress=178 // function was declared but never referenced
76 #       pragma diag_suppress=237 // controlling expression is constant
77 #endif
78
79 // Inlining controls
80 #if defined(_MSC_VER) && _MSC_VER >= 1300
81 #       define PUGI__NO_INLINE __declspec(noinline)
82 #elif defined(__GNUC__)
83 #       define PUGI__NO_INLINE __attribute__((noinline))
84 #else
85 #       define PUGI__NO_INLINE
86 #endif
87
88 // Branch weight controls
89 #if defined(__GNUC__)
90 #       define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
91 #else
92 #       define PUGI__UNLIKELY(cond) (cond)
93 #endif
94
95 // Simple static assertion
96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
97
98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
99 #ifdef __DMC__
100 #       define PUGI__DMC_VOLATILE volatile
101 #else
102 #       define PUGI__DMC_VOLATILE
103 #endif
104
105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
107 using std::memcpy;
108 using std::memmove;
109 using std::memset;
110 #endif
111
112 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
113 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
114 #       define LLONG_MAX 9223372036854775807LL
115 #       define LLONG_MIN (-LLONG_MAX-1)
116 #       define ULLONG_MAX (2ULL*LLONG_MAX+1)
117 #endif
118
119 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
120 #if defined(_MSC_VER) && !defined(__S3E__)
121 #       define PUGI__MSVC_CRT_VERSION _MSC_VER
122 #endif
123
124 #ifdef PUGIXML_HEADER_ONLY
125 #       define PUGI__NS_BEGIN namespace pugi { namespace impl {
126 #       define PUGI__NS_END } }
127 #       define PUGI__FN inline
128 #       define PUGI__FN_NO_INLINE inline
129 #else
130 #       if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
131 #               define PUGI__NS_BEGIN namespace pugi { namespace impl {
132 #               define PUGI__NS_END } }
133 #       else
134 #               define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
135 #               define PUGI__NS_END } } }
136 #       endif
137 #       define PUGI__FN
138 #       define PUGI__FN_NO_INLINE PUGI__NO_INLINE
139 #endif
140
141 // uintptr_t
142 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
143 namespace pugi
144 {
145 #       ifndef _UINTPTR_T_DEFINED
146         typedef size_t uintptr_t;
147 #       endif
148
149         typedef unsigned __int8 uint8_t;
150         typedef unsigned __int16 uint16_t;
151         typedef unsigned __int32 uint32_t;
152 }
153 #else
154 #       include <stdint.h>
155 #endif
156
157 // Memory allocation
158 PUGI__NS_BEGIN
159         PUGI__FN void* default_allocate(size_t size)
160         {
161                 return malloc(size);
162         }
163
164         PUGI__FN void default_deallocate(void* ptr)
165         {
166                 free(ptr);
167         }
168
169         template <typename T>
170         struct xml_memory_management_function_storage
171         {
172                 static allocation_function allocate;
173                 static deallocation_function deallocate;
174         };
175
176         // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
177         // Without a template<> we'll get multiple definitions of the same static
178         template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
179         template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
180
181         typedef xml_memory_management_function_storage<int> xml_memory;
182 PUGI__NS_END
183
184 // String utilities
185 PUGI__NS_BEGIN
186         // Get string length
187         PUGI__FN size_t strlength(const char_t* s)
188         {
189                 assert(s);
190
191         #ifdef PUGIXML_WCHAR_MODE
192                 return wcslen(s);
193         #else
194                 return strlen(s);
195         #endif
196         }
197
198         // Compare two strings
199         PUGI__FN bool strequal(const char_t* src, const char_t* dst)
200         {
201                 assert(src && dst);
202
203         #ifdef PUGIXML_WCHAR_MODE
204                 return wcscmp(src, dst) == 0;
205         #else
206                 return strcmp(src, dst) == 0;
207         #endif
208         }
209
210         // Compare lhs with [rhs_begin, rhs_end)
211         PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
212         {
213                 for (size_t i = 0; i < count; ++i)
214                         if (lhs[i] != rhs[i])
215                                 return false;
216
217                 return lhs[count] == 0;
218         }
219
220         // Get length of wide string, even if CRT lacks wide character support
221         PUGI__FN size_t strlength_wide(const wchar_t* s)
222         {
223                 assert(s);
224
225         #ifdef PUGIXML_WCHAR_MODE
226                 return wcslen(s);
227         #else
228                 const wchar_t* end = s;
229                 while (*end) end++;
230                 return static_cast<size_t>(end - s);
231         #endif
232         }
233 PUGI__NS_END
234
235 // auto_ptr-like object for exception recovery
236 PUGI__NS_BEGIN
237         template <typename T> struct auto_deleter
238         {
239                 typedef void (*D)(T*);
240
241                 T* data;
242                 D deleter;
243
244                 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
245                 {
246                 }
247
248                 ~auto_deleter()
249                 {
250                         if (data) deleter(data);
251                 }
252
253                 T* release()
254                 {
255                         T* result = data;
256                         data = 0;
257                         return result;
258                 }
259         };
260 PUGI__NS_END
261
262 #ifdef PUGIXML_COMPACT
263 PUGI__NS_BEGIN
264         class compact_hash_table
265         {
266         public:
267                 compact_hash_table(): _items(0), _capacity(0), _count(0)
268                 {
269                 }
270
271                 void clear()
272                 {
273                         if (_items)
274                         {
275                                 xml_memory::deallocate(_items);
276                                 _items = 0;
277                                 _capacity = 0;
278                                 _count = 0;
279                         }
280                 }
281
282                 void** find(const void* key)
283                 {
284                         assert(key);
285
286                         if (_capacity == 0) return 0;
287
288                         size_t hashmod = _capacity - 1;
289                         size_t bucket = hash(key) & hashmod;
290
291                         for (size_t probe = 0; probe <= hashmod; ++probe)
292                         {
293                                 item_t& probe_item = _items[bucket];
294
295                                 if (probe_item.key == key)
296                                         return &probe_item.value;
297
298                                 if (probe_item.key == 0)
299                                         return 0;
300
301                                 // hash collision, quadratic probing
302                                 bucket = (bucket + probe + 1) & hashmod;
303                         }
304
305                         assert(false && "Hash table is full");
306                         return 0;
307                 }
308
309                 void** insert(const void* key)
310                 {
311                         assert(key);
312                         assert(_capacity != 0 && _count < _capacity - _capacity / 4);
313
314                         size_t hashmod = _capacity - 1;
315                         size_t bucket = hash(key) & hashmod;
316
317                         for (size_t probe = 0; probe <= hashmod; ++probe)
318                         {
319                                 item_t& probe_item = _items[bucket];
320
321                                 if (probe_item.key == 0)
322                                 {
323                                         probe_item.key = key;
324                                         _count++;
325                                         return &probe_item.value;
326                                 }
327
328                                 if (probe_item.key == key)
329                                         return &probe_item.value;
330
331                                 // hash collision, quadratic probing
332                                 bucket = (bucket + probe + 1) & hashmod;
333                         }
334
335                         assert(false && "Hash table is full");
336                         return 0;
337                 }
338
339                 bool reserve()
340                 {
341                         if (_count + 16 >= _capacity - _capacity / 4)
342                                 return rehash();
343
344                         return true;
345                 }
346
347         private:
348                 struct item_t
349                 {
350                         const void* key;
351                         void* value;
352                 };
353
354                 item_t* _items;
355                 size_t _capacity;
356
357                 size_t _count;
358
359                 bool rehash();
360
361                 static unsigned int hash(const void* key)
362                 {
363                         unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
364
365                         // MurmurHash3 32-bit finalizer
366                         h ^= h >> 16;
367                         h *= 0x85ebca6bu;
368                         h ^= h >> 13;
369                         h *= 0xc2b2ae35u;
370                         h ^= h >> 16;
371
372                         return h;
373                 }
374         };
375
376         PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
377         {
378                 compact_hash_table rt;
379                 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
380                 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
381
382                 if (!rt._items)
383                         return false;
384
385                 memset(rt._items, 0, sizeof(item_t) * rt._capacity);
386
387                 for (size_t i = 0; i < _capacity; ++i)
388                         if (_items[i].key)
389                                 *rt.insert(_items[i].key) = _items[i].value;
390
391                 if (_items)
392                         xml_memory::deallocate(_items);
393
394                 _capacity = rt._capacity;
395                 _items = rt._items;
396
397                 assert(_count == rt._count);
398
399                 return true;
400         }
401
402 PUGI__NS_END
403 #endif
404
405 PUGI__NS_BEGIN
406 #ifdef PUGIXML_COMPACT
407         static const uintptr_t xml_memory_block_alignment = 4;
408 #else
409         static const uintptr_t xml_memory_block_alignment = sizeof(void*);
410 #endif
411
412         // extra metadata bits
413         static const uintptr_t xml_memory_page_contents_shared_mask = 64;
414         static const uintptr_t xml_memory_page_name_allocated_mask = 32;
415         static const uintptr_t xml_memory_page_value_allocated_mask = 16;
416         static const uintptr_t xml_memory_page_type_mask = 15;
417
418         // combined masks for string uniqueness
419         static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
420         static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
421
422 #ifdef PUGIXML_COMPACT
423         #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
424         #define PUGI__GETPAGE_IMPL(header) (header).get_page()
425 #else
426         #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
427         #define PUGI__GETPAGE_IMPL(header) const_cast<impl::xml_memory_page*>(reinterpret_cast<const impl::xml_memory_page*>(reinterpret_cast<const char*>(&header) - (header >> 8)))
428 #endif
429
430         #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
431         #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
432
433         struct xml_allocator;
434
435         struct xml_memory_page
436         {
437                 static xml_memory_page* construct(void* memory)
438                 {
439                         xml_memory_page* result = static_cast<xml_memory_page*>(memory);
440
441                         result->allocator = 0;
442                         result->prev = 0;
443                         result->next = 0;
444                         result->busy_size = 0;
445                         result->freed_size = 0;
446
447                 #ifdef PUGIXML_COMPACT
448                         result->compact_string_base = 0;
449                         result->compact_shared_parent = 0;
450                         result->compact_page_marker = 0;
451                 #endif
452
453                         return result;
454                 }
455
456                 xml_allocator* allocator;
457
458                 xml_memory_page* prev;
459                 xml_memory_page* next;
460
461                 size_t busy_size;
462                 size_t freed_size;
463
464         #ifdef PUGIXML_COMPACT
465                 char_t* compact_string_base;
466                 void* compact_shared_parent;
467                 uint32_t* compact_page_marker;
468         #endif
469         };
470
471         static const size_t xml_memory_page_size =
472         #ifdef PUGIXML_MEMORY_PAGE_SIZE
473                 (PUGIXML_MEMORY_PAGE_SIZE)
474         #else
475                 32768
476         #endif
477                 - sizeof(xml_memory_page);
478
479         struct xml_memory_string_header
480         {
481                 uint16_t page_offset; // offset from page->data
482                 uint16_t full_size; // 0 if string occupies whole page
483         };
484
485         struct xml_allocator
486         {
487                 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
488                 {
489                 #ifdef PUGIXML_COMPACT
490                         _hash = 0;
491                 #endif
492                 }
493
494                 xml_memory_page* allocate_page(size_t data_size)
495                 {
496                         size_t size = sizeof(xml_memory_page) + data_size;
497
498                         // allocate block with some alignment, leaving memory for worst-case padding
499                         void* memory = xml_memory::allocate(size);
500                         if (!memory) return 0;
501
502                         // prepare page structure
503                         xml_memory_page* page = xml_memory_page::construct(memory);
504                         assert(page);
505
506                         page->allocator = _root->allocator;
507
508                         return page;
509                 }
510
511                 static void deallocate_page(xml_memory_page* page)
512                 {
513                         xml_memory::deallocate(page);
514                 }
515
516                 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
517
518                 void* allocate_memory(size_t size, xml_memory_page*& out_page)
519                 {
520                         if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
521                                 return allocate_memory_oob(size, out_page);
522
523                         void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
524
525                         _busy_size += size;
526
527                         out_page = _root;
528
529                         return buf;
530                 }
531
532         #ifdef PUGIXML_COMPACT
533                 void* allocate_object(size_t size, xml_memory_page*& out_page)
534                 {
535                         void* result = allocate_memory(size + sizeof(uint32_t), out_page);
536                         if (!result) return 0;
537
538                         // adjust for marker
539                         ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
540
541                         if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
542                         {
543                                 // insert new marker
544                                 uint32_t* marker = static_cast<uint32_t*>(result);
545
546                                 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
547                                 out_page->compact_page_marker = marker;
548
549                                 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
550                                 // this will make sure deallocate_memory correctly tracks the size
551                                 out_page->freed_size += sizeof(uint32_t);
552
553                                 return marker + 1;
554                         }
555                         else
556                         {
557                                 // roll back uint32_t part
558                                 _busy_size -= sizeof(uint32_t);
559
560                                 return result;
561                         }
562                 }
563         #else
564                 void* allocate_object(size_t size, xml_memory_page*& out_page)
565                 {
566                         return allocate_memory(size, out_page);
567                 }
568         #endif
569
570                 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
571                 {
572                         if (page == _root) page->busy_size = _busy_size;
573
574                         assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
575                         (void)!ptr;
576
577                         page->freed_size += size;
578                         assert(page->freed_size <= page->busy_size);
579
580                         if (page->freed_size == page->busy_size)
581                         {
582                                 if (page->next == 0)
583                                 {
584                                         assert(_root == page);
585
586                                         // top page freed, just reset sizes
587                                         page->busy_size = 0;
588                                         page->freed_size = 0;
589
590                                 #ifdef PUGIXML_COMPACT
591                                         // reset compact state to maximize efficiency
592                                         page->compact_string_base = 0;
593                                         page->compact_shared_parent = 0;
594                                         page->compact_page_marker = 0;
595                                 #endif
596
597                                         _busy_size = 0;
598                                 }
599                                 else
600                                 {
601                                         assert(_root != page);
602                                         assert(page->prev);
603
604                                         // remove from the list
605                                         page->prev->next = page->next;
606                                         page->next->prev = page->prev;
607
608                                         // deallocate
609                                         deallocate_page(page);
610                                 }
611                         }
612                 }
613
614                 char_t* allocate_string(size_t length)
615                 {
616                         static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
617
618                         PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
619
620                         // allocate memory for string and header block
621                         size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
622
623                         // round size up to block alignment boundary
624                         size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
625
626                         xml_memory_page* page;
627                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
628
629                         if (!header) return 0;
630
631                         // setup header
632                         ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
633
634                         assert(page_offset % xml_memory_block_alignment == 0);
635                         assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
636                         header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
637
638                         // full_size == 0 for large strings that occupy the whole page
639                         assert(full_size % xml_memory_block_alignment == 0);
640                         assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
641                         header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
642
643                         // round-trip through void* to avoid 'cast increases required alignment of target type' warning
644                         // header is guaranteed a pointer-sized alignment, which should be enough for char_t
645                         return static_cast<char_t*>(static_cast<void*>(header + 1));
646                 }
647
648                 void deallocate_string(char_t* string)
649                 {
650                         // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
651                         // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
652
653                         // get header
654                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
655                         assert(header);
656
657                         // deallocate
658                         size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
659                         xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
660
661                         // if full_size == 0 then this string occupies the whole page
662                         size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
663
664                         deallocate_memory(header, full_size, page);
665                 }
666
667                 bool reserve()
668                 {
669                 #ifdef PUGIXML_COMPACT
670                         return _hash->reserve();
671                 #else
672                         return true;
673                 #endif
674                 }
675
676                 xml_memory_page* _root;
677                 size_t _busy_size;
678
679         #ifdef PUGIXML_COMPACT
680                 compact_hash_table* _hash;
681         #endif
682         };
683
684         PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
685         {
686                 const size_t large_allocation_threshold = xml_memory_page_size / 4;
687
688                 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
689                 out_page = page;
690
691                 if (!page) return 0;
692
693                 if (size <= large_allocation_threshold)
694                 {
695                         _root->busy_size = _busy_size;
696
697                         // insert page at the end of linked list
698                         page->prev = _root;
699                         _root->next = page;
700                         _root = page;
701
702                         _busy_size = size;
703                 }
704                 else
705                 {
706                         // insert page before the end of linked list, so that it is deleted as soon as possible
707                         // the last page is not deleted even if it's empty (see deallocate_memory)
708                         assert(_root->prev);
709
710                         page->prev = _root->prev;
711                         page->next = _root;
712
713                         _root->prev->next = page;
714                         _root->prev = page;
715
716                         page->busy_size = size;
717                 }
718
719                 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
720         }
721 PUGI__NS_END
722
723 #ifdef PUGIXML_COMPACT
724 PUGI__NS_BEGIN
725         static const uintptr_t compact_alignment_log2 = 2;
726         static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
727
728         class compact_header
729         {
730         public:
731                 compact_header(xml_memory_page* page, unsigned int flags)
732                 {
733                         PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
734
735                         ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
736                         assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
737
738                         _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
739                         _flags = static_cast<unsigned char>(flags);
740                 }
741
742                 void operator&=(uintptr_t mod)
743                 {
744                         _flags &= static_cast<unsigned char>(mod);
745                 }
746
747                 void operator|=(uintptr_t mod)
748                 {
749                         _flags |= static_cast<unsigned char>(mod);
750                 }
751
752                 uintptr_t operator&(uintptr_t mod) const
753                 {
754                         return _flags & mod;
755                 }
756
757                 xml_memory_page* get_page() const
758                 {
759                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
760                         const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
761                         const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
762
763                         return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
764                 }
765
766         private:
767                 unsigned char _page;
768                 unsigned char _flags;
769         };
770
771         PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
772         {
773                 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
774
775                 return header->get_page();
776         }
777
778         template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
779         {
780                 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
781         }
782
783         template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
784         {
785                 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
786         }
787
788         template <typename T, int header_offset, int start = -126> class compact_pointer
789         {
790         public:
791                 compact_pointer(): _data(0)
792                 {
793                 }
794
795                 void operator=(const compact_pointer& rhs)
796                 {
797                         *this = rhs + 0;
798                 }
799
800                 void operator=(T* value)
801                 {
802                         if (value)
803                         {
804                                 // value is guaranteed to be compact-aligned; 'this' is not
805                                 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
806                                 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
807                                 // compensate for arithmetic shift rounding for negative values
808                                 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
809                                 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
810
811                                 if (static_cast<uintptr_t>(offset) <= 253)
812                                         _data = static_cast<unsigned char>(offset + 1);
813                                 else
814                                 {
815                                         compact_set_value<header_offset>(this, value);
816
817                                         _data = 255;
818                                 }
819                         }
820                         else
821                                 _data = 0;
822                 }
823
824                 operator T*() const
825                 {
826                         if (_data)
827                         {
828                                 if (_data < 255)
829                                 {
830                                         uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
831
832                                         return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
833                                 }
834                                 else
835                                         return compact_get_value<header_offset, T>(this);
836                         }
837                         else
838                                 return 0;
839                 }
840
841                 T* operator->() const
842                 {
843                         return *this;
844                 }
845
846         private:
847                 unsigned char _data;
848         };
849
850         template <typename T, int header_offset> class compact_pointer_parent
851         {
852         public:
853                 compact_pointer_parent(): _data(0)
854                 {
855                 }
856
857                 void operator=(const compact_pointer_parent& rhs)
858                 {
859                         *this = rhs + 0;
860                 }
861
862                 void operator=(T* value)
863                 {
864                         if (value)
865                         {
866                                 // value is guaranteed to be compact-aligned; 'this' is not
867                                 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
868                                 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
869                                 // compensate for arithmetic shift behavior for negative values
870                                 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
871                                 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
872
873                                 if (static_cast<uintptr_t>(offset) <= 65533)
874                                 {
875                                         _data = static_cast<unsigned short>(offset + 1);
876                                 }
877                                 else
878                                 {
879                                         xml_memory_page* page = compact_get_page(this, header_offset);
880
881                                         if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
882                                                 page->compact_shared_parent = value;
883
884                                         if (page->compact_shared_parent == value)
885                                         {
886                                                 _data = 65534;
887                                         }
888                                         else
889                                         {
890                                                 compact_set_value<header_offset>(this, value);
891
892                                                 _data = 65535;
893                                         }
894                                 }
895                         }
896                         else
897                         {
898                                 _data = 0;
899                         }
900                 }
901
902                 operator T*() const
903                 {
904                         if (_data)
905                         {
906                                 if (_data < 65534)
907                                 {
908                                         uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
909
910                                         return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
911                                 }
912                                 else if (_data == 65534)
913                                         return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
914                                 else
915                                         return compact_get_value<header_offset, T>(this);
916                         }
917                         else
918                                 return 0;
919                 }
920
921                 T* operator->() const
922                 {
923                         return *this;
924                 }
925
926         private:
927                 uint16_t _data;
928         };
929
930         template <int header_offset, int base_offset> class compact_string
931         {
932         public:
933                 compact_string(): _data(0)
934                 {
935                 }
936
937                 void operator=(const compact_string& rhs)
938                 {
939                         *this = rhs + 0;
940                 }
941
942                 void operator=(char_t* value)
943                 {
944                         if (value)
945                         {
946                                 xml_memory_page* page = compact_get_page(this, header_offset);
947
948                                 if (PUGI__UNLIKELY(page->compact_string_base == 0))
949                                         page->compact_string_base = value;
950
951                                 ptrdiff_t offset = value - page->compact_string_base;
952
953                                 if (static_cast<uintptr_t>(offset) < (65535 << 7))
954                                 {
955                                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
956                                         uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
957
958                                         if (*base == 0)
959                                         {
960                                                 *base = static_cast<uint16_t>((offset >> 7) + 1);
961                                                 _data = static_cast<unsigned char>((offset & 127) + 1);
962                                         }
963                                         else
964                                         {
965                                                 ptrdiff_t remainder = offset - ((*base - 1) << 7);
966
967                                                 if (static_cast<uintptr_t>(remainder) <= 253)
968                                                 {
969                                                         _data = static_cast<unsigned char>(remainder + 1);
970                                                 }
971                                                 else
972                                                 {
973                                                         compact_set_value<header_offset>(this, value);
974
975                                                         _data = 255;
976                                                 }
977                                         }
978                                 }
979                                 else
980                                 {
981                                         compact_set_value<header_offset>(this, value);
982
983                                         _data = 255;
984                                 }
985                         }
986                         else
987                         {
988                                 _data = 0;
989                         }
990                 }
991
992                 operator char_t*() const
993                 {
994                         if (_data)
995                         {
996                                 if (_data < 255)
997                                 {
998                                         xml_memory_page* page = compact_get_page(this, header_offset);
999
1000                                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1001                                         const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1002                                         assert(*base);
1003
1004                                         ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1005
1006                                         return page->compact_string_base + offset;
1007                                 }
1008                                 else
1009                                 {
1010                                         return compact_get_value<header_offset, char_t>(this);
1011                                 }
1012                         }
1013                         else
1014                                 return 0;
1015                 }
1016
1017         private:
1018                 unsigned char _data;
1019         };
1020 PUGI__NS_END
1021 #endif
1022
1023 #ifdef PUGIXML_COMPACT
1024 namespace pugi
1025 {
1026         struct xml_attribute_struct
1027         {
1028                 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1029                 {
1030                         PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1031                 }
1032
1033                 impl::compact_header header;
1034
1035                 uint16_t namevalue_base;
1036
1037                 impl::compact_string<4, 2> name;
1038                 impl::compact_string<5, 3> value;
1039
1040                 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1041                 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1042         };
1043
1044         struct xml_node_struct
1045         {
1046                 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1047                 {
1048                         PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1049                 }
1050
1051                 impl::compact_header header;
1052
1053                 uint16_t namevalue_base;
1054
1055                 impl::compact_string<4, 2> name;
1056                 impl::compact_string<5, 3> value;
1057
1058                 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1059
1060                 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1061
1062                 impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1063                 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1064
1065                 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1066         };
1067 }
1068 #else
1069 namespace pugi
1070 {
1071         struct xml_attribute_struct
1072         {
1073                 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1074                 {
1075                         header = PUGI__GETHEADER_IMPL(this, page, 0);
1076                 }
1077
1078                 uintptr_t header;
1079
1080                 char_t* name;
1081                 char_t* value;
1082
1083                 xml_attribute_struct* prev_attribute_c;
1084                 xml_attribute_struct* next_attribute;
1085         };
1086
1087         struct xml_node_struct
1088         {
1089                 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1090                 {
1091                         header = PUGI__GETHEADER_IMPL(this, page, type);
1092                 }
1093
1094                 uintptr_t header;
1095
1096                 char_t* name;
1097                 char_t* value;
1098
1099                 xml_node_struct* parent;
1100
1101                 xml_node_struct* first_child;
1102
1103                 xml_node_struct* prev_sibling_c;
1104                 xml_node_struct* next_sibling;
1105
1106                 xml_attribute_struct* first_attribute;
1107         };
1108 }
1109 #endif
1110
1111 PUGI__NS_BEGIN
1112         struct xml_extra_buffer
1113         {
1114                 char_t* buffer;
1115                 xml_extra_buffer* next;
1116         };
1117
1118         struct xml_document_struct: public xml_node_struct, public xml_allocator
1119         {
1120                 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1121                 {
1122                 #ifdef PUGIXML_COMPACT
1123                         _hash = &hash;
1124                 #endif
1125                 }
1126
1127                 const char_t* buffer;
1128
1129                 xml_extra_buffer* extra_buffers;
1130
1131         #ifdef PUGIXML_COMPACT
1132                 compact_hash_table hash;
1133         #endif
1134         };
1135
1136         template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1137         {
1138                 assert(object);
1139
1140                 return *PUGI__GETPAGE(object)->allocator;
1141         }
1142
1143         template <typename Object> inline xml_document_struct& get_document(const Object* object)
1144         {
1145                 assert(object);
1146
1147                 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1148         }
1149 PUGI__NS_END
1150
1151 // Low-level DOM operations
1152 PUGI__NS_BEGIN
1153         inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1154         {
1155                 xml_memory_page* page;
1156                 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1157                 if (!memory) return 0;
1158
1159                 return new (memory) xml_attribute_struct(page);
1160         }
1161
1162         inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1163         {
1164                 xml_memory_page* page;
1165                 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1166                 if (!memory) return 0;
1167
1168                 return new (memory) xml_node_struct(page, type);
1169         }
1170
1171         inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1172         {
1173                 if (a->header & impl::xml_memory_page_name_allocated_mask)
1174                         alloc.deallocate_string(a->name);
1175
1176                 if (a->header & impl::xml_memory_page_value_allocated_mask)
1177                         alloc.deallocate_string(a->value);
1178
1179                 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1180         }
1181
1182         inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1183         {
1184                 if (n->header & impl::xml_memory_page_name_allocated_mask)
1185                         alloc.deallocate_string(n->name);
1186
1187                 if (n->header & impl::xml_memory_page_value_allocated_mask)
1188                         alloc.deallocate_string(n->value);
1189
1190                 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1191                 {
1192                         xml_attribute_struct* next = attr->next_attribute;
1193
1194                         destroy_attribute(attr, alloc);
1195
1196                         attr = next;
1197                 }
1198
1199                 for (xml_node_struct* child = n->first_child; child; )
1200                 {
1201                         xml_node_struct* next = child->next_sibling;
1202
1203                         destroy_node(child, alloc);
1204
1205                         child = next;
1206                 }
1207
1208                 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1209         }
1210
1211         inline void append_node(xml_node_struct* child, xml_node_struct* node)
1212         {
1213                 child->parent = node;
1214
1215                 xml_node_struct* head = node->first_child;
1216
1217                 if (head)
1218                 {
1219                         xml_node_struct* tail = head->prev_sibling_c;
1220
1221                         tail->next_sibling = child;
1222                         child->prev_sibling_c = tail;
1223                         head->prev_sibling_c = child;
1224                 }
1225                 else
1226                 {
1227                         node->first_child = child;
1228                         child->prev_sibling_c = child;
1229                 }
1230         }
1231
1232         inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1233         {
1234                 child->parent = node;
1235
1236                 xml_node_struct* head = node->first_child;
1237
1238                 if (head)
1239                 {
1240                         child->prev_sibling_c = head->prev_sibling_c;
1241                         head->prev_sibling_c = child;
1242                 }
1243                 else
1244                         child->prev_sibling_c = child;
1245
1246                 child->next_sibling = head;
1247                 node->first_child = child;
1248         }
1249
1250         inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1251         {
1252                 xml_node_struct* parent = node->parent;
1253
1254                 child->parent = parent;
1255
1256                 if (node->next_sibling)
1257                         node->next_sibling->prev_sibling_c = child;
1258                 else
1259                         parent->first_child->prev_sibling_c = child;
1260
1261                 child->next_sibling = node->next_sibling;
1262                 child->prev_sibling_c = node;
1263
1264                 node->next_sibling = child;
1265         }
1266
1267         inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1268         {
1269                 xml_node_struct* parent = node->parent;
1270
1271                 child->parent = parent;
1272
1273                 if (node->prev_sibling_c->next_sibling)
1274                         node->prev_sibling_c->next_sibling = child;
1275                 else
1276                         parent->first_child = child;
1277
1278                 child->prev_sibling_c = node->prev_sibling_c;
1279                 child->next_sibling = node;
1280
1281                 node->prev_sibling_c = child;
1282         }
1283
1284         inline void remove_node(xml_node_struct* node)
1285         {
1286                 xml_node_struct* parent = node->parent;
1287
1288                 if (node->next_sibling)
1289                         node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1290                 else
1291                         parent->first_child->prev_sibling_c = node->prev_sibling_c;
1292
1293                 if (node->prev_sibling_c->next_sibling)
1294                         node->prev_sibling_c->next_sibling = node->next_sibling;
1295                 else
1296                         parent->first_child = node->next_sibling;
1297
1298                 node->parent = 0;
1299                 node->prev_sibling_c = 0;
1300                 node->next_sibling = 0;
1301         }
1302
1303         inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1304         {
1305                 xml_attribute_struct* head = node->first_attribute;
1306
1307                 if (head)
1308                 {
1309                         xml_attribute_struct* tail = head->prev_attribute_c;
1310
1311                         tail->next_attribute = attr;
1312                         attr->prev_attribute_c = tail;
1313                         head->prev_attribute_c = attr;
1314                 }
1315                 else
1316                 {
1317                         node->first_attribute = attr;
1318                         attr->prev_attribute_c = attr;
1319                 }
1320         }
1321
1322         inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1323         {
1324                 xml_attribute_struct* head = node->first_attribute;
1325
1326                 if (head)
1327                 {
1328                         attr->prev_attribute_c = head->prev_attribute_c;
1329                         head->prev_attribute_c = attr;
1330                 }
1331                 else
1332                         attr->prev_attribute_c = attr;
1333
1334                 attr->next_attribute = head;
1335                 node->first_attribute = attr;
1336         }
1337
1338         inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1339         {
1340                 if (place->next_attribute)
1341                         place->next_attribute->prev_attribute_c = attr;
1342                 else
1343                         node->first_attribute->prev_attribute_c = attr;
1344
1345                 attr->next_attribute = place->next_attribute;
1346                 attr->prev_attribute_c = place;
1347                 place->next_attribute = attr;
1348         }
1349
1350         inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1351         {
1352                 if (place->prev_attribute_c->next_attribute)
1353                         place->prev_attribute_c->next_attribute = attr;
1354                 else
1355                         node->first_attribute = attr;
1356
1357                 attr->prev_attribute_c = place->prev_attribute_c;
1358                 attr->next_attribute = place;
1359                 place->prev_attribute_c = attr;
1360         }
1361
1362         inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1363         {
1364                 if (attr->next_attribute)
1365                         attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1366                 else
1367                         node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1368
1369                 if (attr->prev_attribute_c->next_attribute)
1370                         attr->prev_attribute_c->next_attribute = attr->next_attribute;
1371                 else
1372                         node->first_attribute = attr->next_attribute;
1373
1374                 attr->prev_attribute_c = 0;
1375                 attr->next_attribute = 0;
1376         }
1377
1378         PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1379         {
1380                 if (!alloc.reserve()) return 0;
1381
1382                 xml_node_struct* child = allocate_node(alloc, type);
1383                 if (!child) return 0;
1384
1385                 append_node(child, node);
1386
1387                 return child;
1388         }
1389
1390         PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1391         {
1392                 if (!alloc.reserve()) return 0;
1393
1394                 xml_attribute_struct* attr = allocate_attribute(alloc);
1395                 if (!attr) return 0;
1396
1397                 append_attribute(attr, node);
1398
1399                 return attr;
1400         }
1401 PUGI__NS_END
1402
1403 // Helper classes for code generation
1404 PUGI__NS_BEGIN
1405         struct opt_false
1406         {
1407                 enum { value = 0 };
1408         };
1409
1410         struct opt_true
1411         {
1412                 enum { value = 1 };
1413         };
1414 PUGI__NS_END
1415
1416 // Unicode utilities
1417 PUGI__NS_BEGIN
1418         inline uint16_t endian_swap(uint16_t value)
1419         {
1420                 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1421         }
1422
1423         inline uint32_t endian_swap(uint32_t value)
1424         {
1425                 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1426         }
1427
1428         struct utf8_counter
1429         {
1430                 typedef size_t value_type;
1431
1432                 static value_type low(value_type result, uint32_t ch)
1433                 {
1434                         // U+0000..U+007F
1435                         if (ch < 0x80) return result + 1;
1436                         // U+0080..U+07FF
1437                         else if (ch < 0x800) return result + 2;
1438                         // U+0800..U+FFFF
1439                         else return result + 3;
1440                 }
1441
1442                 static value_type high(value_type result, uint32_t)
1443                 {
1444                         // U+10000..U+10FFFF
1445                         return result + 4;
1446                 }
1447         };
1448
1449         struct utf8_writer
1450         {
1451                 typedef uint8_t* value_type;
1452
1453                 static value_type low(value_type result, uint32_t ch)
1454                 {
1455                         // U+0000..U+007F
1456                         if (ch < 0x80)
1457                         {
1458                                 *result = static_cast<uint8_t>(ch);
1459                                 return result + 1;
1460                         }
1461                         // U+0080..U+07FF
1462                         else if (ch < 0x800)
1463                         {
1464                                 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1465                                 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1466                                 return result + 2;
1467                         }
1468                         // U+0800..U+FFFF
1469                         else
1470                         {
1471                                 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1472                                 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1473                                 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1474                                 return result + 3;
1475                         }
1476                 }
1477
1478                 static value_type high(value_type result, uint32_t ch)
1479                 {
1480                         // U+10000..U+10FFFF
1481                         result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1482                         result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1483                         result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1484                         result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1485                         return result + 4;
1486                 }
1487
1488                 static value_type any(value_type result, uint32_t ch)
1489                 {
1490                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1491                 }
1492         };
1493
1494         struct utf16_counter
1495         {
1496                 typedef size_t value_type;
1497
1498                 static value_type low(value_type result, uint32_t)
1499                 {
1500                         return result + 1;
1501                 }
1502
1503                 static value_type high(value_type result, uint32_t)
1504                 {
1505                         return result + 2;
1506                 }
1507         };
1508
1509         struct utf16_writer
1510         {
1511                 typedef uint16_t* value_type;
1512
1513                 static value_type low(value_type result, uint32_t ch)
1514                 {
1515                         *result = static_cast<uint16_t>(ch);
1516
1517                         return result + 1;
1518                 }
1519
1520                 static value_type high(value_type result, uint32_t ch)
1521                 {
1522                         uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1523                         uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1524
1525                         result[0] = static_cast<uint16_t>(0xD800 + msh);
1526                         result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1527
1528                         return result + 2;
1529                 }
1530
1531                 static value_type any(value_type result, uint32_t ch)
1532                 {
1533                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1534                 }
1535         };
1536
1537         struct utf32_counter
1538         {
1539                 typedef size_t value_type;
1540
1541                 static value_type low(value_type result, uint32_t)
1542                 {
1543                         return result + 1;
1544                 }
1545
1546                 static value_type high(value_type result, uint32_t)
1547                 {
1548                         return result + 1;
1549                 }
1550         };
1551
1552         struct utf32_writer
1553         {
1554                 typedef uint32_t* value_type;
1555
1556                 static value_type low(value_type result, uint32_t ch)
1557                 {
1558                         *result = ch;
1559
1560                         return result + 1;
1561                 }
1562
1563                 static value_type high(value_type result, uint32_t ch)
1564                 {
1565                         *result = ch;
1566
1567                         return result + 1;
1568                 }
1569
1570                 static value_type any(value_type result, uint32_t ch)
1571                 {
1572                         *result = ch;
1573
1574                         return result + 1;
1575                 }
1576         };
1577
1578         struct latin1_writer
1579         {
1580                 typedef uint8_t* value_type;
1581
1582                 static value_type low(value_type result, uint32_t ch)
1583                 {
1584                         *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1585
1586                         return result + 1;
1587                 }
1588
1589                 static value_type high(value_type result, uint32_t ch)
1590                 {
1591                         (void)ch;
1592
1593                         *result = '?';
1594
1595                         return result + 1;
1596                 }
1597         };
1598
1599         struct utf8_decoder
1600         {
1601                 typedef uint8_t type;
1602
1603                 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1604                 {
1605                         const uint8_t utf8_byte_mask = 0x3f;
1606
1607                         while (size)
1608                         {
1609                                 uint8_t lead = *data;
1610
1611                                 // 0xxxxxxx -> U+0000..U+007F
1612                                 if (lead < 0x80)
1613                                 {
1614                                         result = Traits::low(result, lead);
1615                                         data += 1;
1616                                         size -= 1;
1617
1618                                         // process aligned single-byte (ascii) blocks
1619                                         if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1620                                         {
1621                                                 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1622                                                 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1623                                                 {
1624                                                         result = Traits::low(result, data[0]);
1625                                                         result = Traits::low(result, data[1]);
1626                                                         result = Traits::low(result, data[2]);
1627                                                         result = Traits::low(result, data[3]);
1628                                                         data += 4;
1629                                                         size -= 4;
1630                                                 }
1631                                         }
1632                                 }
1633                                 // 110xxxxx -> U+0080..U+07FF
1634                                 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1635                                 {
1636                                         result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1637                                         data += 2;
1638                                         size -= 2;
1639                                 }
1640                                 // 1110xxxx -> U+0800-U+FFFF
1641                                 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1642                                 {
1643                                         result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1644                                         data += 3;
1645                                         size -= 3;
1646                                 }
1647                                 // 11110xxx -> U+10000..U+10FFFF
1648                                 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1649                                 {
1650                                         result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1651                                         data += 4;
1652                                         size -= 4;
1653                                 }
1654                                 // 10xxxxxx or 11111xxx -> invalid
1655                                 else
1656                                 {
1657                                         data += 1;
1658                                         size -= 1;
1659                                 }
1660                         }
1661
1662                         return result;
1663                 }
1664         };
1665
1666         template <typename opt_swap> struct utf16_decoder
1667         {
1668                 typedef uint16_t type;
1669
1670                 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1671                 {
1672                         while (size)
1673                         {
1674                                 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1675
1676                                 // U+0000..U+D7FF
1677                                 if (lead < 0xD800)
1678                                 {
1679                                         result = Traits::low(result, lead);
1680                                         data += 1;
1681                                         size -= 1;
1682                                 }
1683                                 // U+E000..U+FFFF
1684                                 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1685                                 {
1686                                         result = Traits::low(result, lead);
1687                                         data += 1;
1688                                         size -= 1;
1689                                 }
1690                                 // surrogate pair lead
1691                                 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1692                                 {
1693                                         uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1694
1695                                         if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1696                                         {
1697                                                 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1698                                                 data += 2;
1699                                                 size -= 2;
1700                                         }
1701                                         else
1702                                         {
1703                                                 data += 1;
1704                                                 size -= 1;
1705                                         }
1706                                 }
1707                                 else
1708                                 {
1709                                         data += 1;
1710                                         size -= 1;
1711                                 }
1712                         }
1713
1714                         return result;
1715                 }
1716         };
1717
1718         template <typename opt_swap> struct utf32_decoder
1719         {
1720                 typedef uint32_t type;
1721
1722                 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1723                 {
1724                         while (size)
1725                         {
1726                                 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1727
1728                                 // U+0000..U+FFFF
1729                                 if (lead < 0x10000)
1730                                 {
1731                                         result = Traits::low(result, lead);
1732                                         data += 1;
1733                                         size -= 1;
1734                                 }
1735                                 // U+10000..U+10FFFF
1736                                 else
1737                                 {
1738                                         result = Traits::high(result, lead);
1739                                         data += 1;
1740                                         size -= 1;
1741                                 }
1742                         }
1743
1744                         return result;
1745                 }
1746         };
1747
1748         struct latin1_decoder
1749         {
1750                 typedef uint8_t type;
1751
1752                 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1753                 {
1754                         while (size)
1755                         {
1756                                 result = Traits::low(result, *data);
1757                                 data += 1;
1758                                 size -= 1;
1759                         }
1760
1761                         return result;
1762                 }
1763         };
1764
1765         template <size_t size> struct wchar_selector;
1766
1767         template <> struct wchar_selector<2>
1768         {
1769                 typedef uint16_t type;
1770                 typedef utf16_counter counter;
1771                 typedef utf16_writer writer;
1772                 typedef utf16_decoder<opt_false> decoder;
1773         };
1774
1775         template <> struct wchar_selector<4>
1776         {
1777                 typedef uint32_t type;
1778                 typedef utf32_counter counter;
1779                 typedef utf32_writer writer;
1780                 typedef utf32_decoder<opt_false> decoder;
1781         };
1782
1783         typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1784         typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1785
1786         struct wchar_decoder
1787         {
1788                 typedef wchar_t type;
1789
1790                 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1791                 {
1792                         typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1793
1794                         return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1795                 }
1796         };
1797
1798 #ifdef PUGIXML_WCHAR_MODE
1799         PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1800         {
1801                 for (size_t i = 0; i < length; ++i)
1802                         result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1803         }
1804 #endif
1805 PUGI__NS_END
1806
1807 PUGI__NS_BEGIN
1808         enum chartype_t
1809         {
1810                 ct_parse_pcdata = 1,    // \0, &, \r, <
1811                 ct_parse_attr = 2,              // \0, &, \r, ', "
1812                 ct_parse_attr_ws = 4,   // \0, &, \r, ', ", \n, tab
1813                 ct_space = 8,                   // \r, \n, space, tab
1814                 ct_parse_cdata = 16,    // \0, ], >, \r
1815                 ct_parse_comment = 32,  // \0, -, >, \r
1816                 ct_symbol = 64,                 // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1817                 ct_start_symbol = 128   // Any symbol > 127, a-z, A-Z, _, :
1818         };
1819
1820         static const unsigned char chartype_table[256] =
1821         {
1822                 55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1823                 0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1824                 8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1825                 64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1826                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1827                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1828                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1829                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1830
1831                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1832                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1833                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1834                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1835                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1836                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1837                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1838                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1839         };
1840
1841         enum chartypex_t
1842         {
1843                 ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1844                 ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1845                 ctx_start_symbol = 4,     // Any symbol > 127, a-z, A-Z, _
1846                 ctx_digit = 8,                    // 0-9
1847                 ctx_symbol = 16                   // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1848         };
1849
1850         static const unsigned char chartypex_table[256] =
1851         {
1852                 3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
1853                 3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1854                 0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1855                 24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
1856
1857                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1858                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1859                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1860                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1861
1862                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1863                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1864                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1865                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1866                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1867                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1868                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1869                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1870         };
1871
1872 #ifdef PUGIXML_WCHAR_MODE
1873         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1874 #else
1875         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1876 #endif
1877
1878         #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1879         #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1880
1881         PUGI__FN bool is_little_endian()
1882         {
1883                 unsigned int ui = 1;
1884
1885                 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1886         }
1887
1888         PUGI__FN xml_encoding get_wchar_encoding()
1889         {
1890                 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1891
1892                 if (sizeof(wchar_t) == 2)
1893                         return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1894                 else
1895                         return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1896         }
1897
1898         PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1899         {
1900         #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1901         #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1902
1903                 // check if we have a non-empty XML declaration
1904                 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1905                         return false;
1906
1907                 // scan XML declaration until the encoding field
1908                 for (size_t i = 6; i + 1 < size; ++i)
1909                 {
1910                         // declaration can not contain ? in quoted values
1911                         if (data[i] == '?')
1912                                 return false;
1913
1914                         if (data[i] == 'e' && data[i + 1] == 'n')
1915                         {
1916                                 size_t offset = i;
1917
1918                                 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1919                                 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1920                                 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1921
1922                                 // S? = S?
1923                                 PUGI__SCANCHARTYPE(ct_space);
1924                                 PUGI__SCANCHAR('=');
1925                                 PUGI__SCANCHARTYPE(ct_space);
1926
1927                                 // the only two valid delimiters are ' and "
1928                                 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1929
1930                                 PUGI__SCANCHAR(delimiter);
1931
1932                                 size_t start = offset;
1933
1934                                 out_encoding = data + offset;
1935
1936                                 PUGI__SCANCHARTYPE(ct_symbol);
1937
1938                                 out_length = offset - start;
1939
1940                                 PUGI__SCANCHAR(delimiter);
1941
1942                                 return true;
1943                         }
1944                 }
1945
1946                 return false;
1947
1948         #undef PUGI__SCANCHAR
1949         #undef PUGI__SCANCHARTYPE
1950         }
1951
1952         PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1953         {
1954                 // skip encoding autodetection if input buffer is too small
1955                 if (size < 4) return encoding_utf8;
1956
1957                 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1958
1959                 // look for BOM in first few bytes
1960                 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1961                 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1962                 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1963                 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1964                 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1965
1966                 // look for <, <? or <?xm in various encodings
1967                 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1968                 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1969                 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1970                 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1971
1972                 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1973                 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1974                 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1975
1976                 // no known BOM detected; parse declaration
1977                 const uint8_t* enc = 0;
1978                 size_t enc_length = 0;
1979
1980                 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
1981                 {
1982                         // iso-8859-1 (case-insensitive)
1983                         if (enc_length == 10
1984                                 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
1985                                 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
1986                                 && enc[8] == '-' && enc[9] == '1')
1987                                 return encoding_latin1;
1988
1989                         // latin1 (case-insensitive)
1990                         if (enc_length == 6
1991                                 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
1992                                 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
1993                                 && enc[5] == '1')
1994                                 return encoding_latin1;
1995                 }
1996
1997                 return encoding_utf8;
1998         }
1999
2000         PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2001         {
2002                 // replace wchar encoding with utf implementation
2003                 if (encoding == encoding_wchar) return get_wchar_encoding();
2004
2005                 // replace utf16 encoding with utf16 with specific endianness
2006                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2007
2008                 // replace utf32 encoding with utf32 with specific endianness
2009                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2010
2011                 // only do autodetection if no explicit encoding is requested
2012                 if (encoding != encoding_auto) return encoding;
2013
2014                 // try to guess encoding (based on XML specification, Appendix F.1)
2015                 const uint8_t* data = static_cast<const uint8_t*>(contents);
2016
2017                 return guess_buffer_encoding(data, size);
2018         }
2019
2020         PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2021         {
2022                 size_t length = size / sizeof(char_t);
2023
2024                 if (is_mutable)
2025                 {
2026                         out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2027                         out_length = length;
2028                 }
2029                 else
2030                 {
2031                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2032                         if (!buffer) return false;
2033
2034                         if (contents)
2035                                 memcpy(buffer, contents, length * sizeof(char_t));
2036                         else
2037                                 assert(length == 0);
2038
2039                         buffer[length] = 0;
2040
2041                         out_buffer = buffer;
2042                         out_length = length + 1;
2043                 }
2044
2045                 return true;
2046         }
2047
2048 #ifdef PUGIXML_WCHAR_MODE
2049         PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2050         {
2051                 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2052                            (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2053         }
2054
2055         PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2056         {
2057                 const char_t* data = static_cast<const char_t*>(contents);
2058                 size_t length = size / sizeof(char_t);
2059
2060                 if (is_mutable)
2061                 {
2062                         char_t* buffer = const_cast<char_t*>(data);
2063
2064                         convert_wchar_endian_swap(buffer, data, length);
2065
2066                         out_buffer = buffer;
2067                         out_length = length;
2068                 }
2069                 else
2070                 {
2071                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2072                         if (!buffer) return false;
2073
2074                         convert_wchar_endian_swap(buffer, data, length);
2075                         buffer[length] = 0;
2076
2077                         out_buffer = buffer;
2078                         out_length = length + 1;
2079                 }
2080
2081                 return true;
2082         }
2083
2084         template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2085         {
2086                 const typename D::type* data = static_cast<const typename D::type*>(contents);
2087                 size_t data_length = size / sizeof(typename D::type);
2088
2089                 // first pass: get length in wchar_t units
2090                 size_t length = D::process(data, data_length, 0, wchar_counter());
2091
2092                 // allocate buffer of suitable length
2093                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2094                 if (!buffer) return false;
2095
2096                 // second pass: convert utf16 input to wchar_t
2097                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2098                 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2099
2100                 assert(oend == obegin + length);
2101                 *oend = 0;
2102
2103                 out_buffer = buffer;
2104                 out_length = length + 1;
2105
2106                 return true;
2107         }
2108
2109         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2110         {
2111                 // get native encoding
2112                 xml_encoding wchar_encoding = get_wchar_encoding();
2113
2114                 // fast path: no conversion required
2115                 if (encoding == wchar_encoding)
2116                         return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2117
2118                 // only endian-swapping is required
2119                 if (need_endian_swap_utf(encoding, wchar_encoding))
2120                         return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2121
2122                 // source encoding is utf8
2123                 if (encoding == encoding_utf8)
2124                         return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2125
2126                 // source encoding is utf16
2127                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2128                 {
2129                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2130
2131                         return (native_encoding == encoding) ?
2132                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2133                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2134                 }
2135
2136                 // source encoding is utf32
2137                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2138                 {
2139                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2140
2141                         return (native_encoding == encoding) ?
2142                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2143                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2144                 }
2145
2146                 // source encoding is latin1
2147                 if (encoding == encoding_latin1)
2148                         return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2149
2150                 assert(false && "Invalid encoding");
2151                 return false;
2152         }
2153 #else
2154         template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2155         {
2156                 const typename D::type* data = static_cast<const typename D::type*>(contents);
2157                 size_t data_length = size / sizeof(typename D::type);
2158
2159                 // first pass: get length in utf8 units
2160                 size_t length = D::process(data, data_length, 0, utf8_counter());
2161
2162                 // allocate buffer of suitable length
2163                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2164                 if (!buffer) return false;
2165
2166                 // second pass: convert utf16 input to utf8
2167                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2168                 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2169
2170                 assert(oend == obegin + length);
2171                 *oend = 0;
2172
2173                 out_buffer = buffer;
2174                 out_length = length + 1;
2175
2176                 return true;
2177         }
2178
2179         PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2180         {
2181                 for (size_t i = 0; i < size; ++i)
2182                         if (data[i] > 127)
2183                                 return i;
2184
2185                 return size;
2186         }
2187
2188         PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2189         {
2190                 const uint8_t* data = static_cast<const uint8_t*>(contents);
2191                 size_t data_length = size;
2192
2193                 // get size of prefix that does not need utf8 conversion
2194                 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2195                 assert(prefix_length <= data_length);
2196
2197                 const uint8_t* postfix = data + prefix_length;
2198                 size_t postfix_length = data_length - prefix_length;
2199
2200                 // if no conversion is needed, just return the original buffer
2201                 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2202
2203                 // first pass: get length in utf8 units
2204                 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2205
2206                 // allocate buffer of suitable length
2207                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2208                 if (!buffer) return false;
2209
2210                 // second pass: convert latin1 input to utf8
2211                 memcpy(buffer, data, prefix_length);
2212
2213                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2214                 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2215
2216                 assert(oend == obegin + length);
2217                 *oend = 0;
2218
2219                 out_buffer = buffer;
2220                 out_length = length + 1;
2221
2222                 return true;
2223         }
2224
2225         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2226         {
2227                 // fast path: no conversion required
2228                 if (encoding == encoding_utf8)
2229                         return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2230
2231                 // source encoding is utf16
2232                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2233                 {
2234                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2235
2236                         return (native_encoding == encoding) ?
2237                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2238                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2239                 }
2240
2241                 // source encoding is utf32
2242                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2243                 {
2244                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2245
2246                         return (native_encoding == encoding) ?
2247                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2248                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2249                 }
2250
2251                 // source encoding is latin1
2252                 if (encoding == encoding_latin1)
2253                         return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2254
2255                 assert(false && "Invalid encoding");
2256                 return false;
2257         }
2258 #endif
2259
2260         PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2261         {
2262                 // get length in utf8 characters
2263                 return wchar_decoder::process(str, length, 0, utf8_counter());
2264         }
2265
2266         PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2267         {
2268                 // convert to utf8
2269                 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2270                 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2271
2272                 assert(begin + size == end);
2273                 (void)!end;
2274                 (void)!size;
2275         }
2276
2277 #ifndef PUGIXML_NO_STL
2278         PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2279         {
2280                 // first pass: get length in utf8 characters
2281                 size_t size = as_utf8_begin(str, length);
2282
2283                 // allocate resulting string
2284                 std::string result;
2285                 result.resize(size);
2286
2287                 // second pass: convert to utf8
2288                 if (size > 0) as_utf8_end(&result[0], size, str, length);
2289
2290                 return result;
2291         }
2292
2293         PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2294         {
2295                 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2296
2297                 // first pass: get length in wchar_t units
2298                 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2299
2300                 // allocate resulting string
2301                 std::basic_string<wchar_t> result;
2302                 result.resize(length);
2303
2304                 // second pass: convert to wchar_t
2305                 if (length > 0)
2306                 {
2307                         wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2308                         wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2309
2310                         assert(begin + length == end);
2311                         (void)!end;
2312                 }
2313
2314                 return result;
2315         }
2316 #endif
2317
2318         template <typename Header>
2319         inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2320         {
2321                 // never reuse shared memory
2322                 if (header & xml_memory_page_contents_shared_mask) return false;
2323
2324                 size_t target_length = strlength(target);
2325
2326                 // always reuse document buffer memory if possible
2327                 if ((header & header_mask) == 0) return target_length >= length;
2328
2329                 // reuse heap memory if waste is not too great
2330                 const size_t reuse_threshold = 32;
2331
2332                 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2333         }
2334
2335         template <typename String, typename Header>
2336         PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2337         {
2338                 if (source_length == 0)
2339                 {
2340                         // empty string and null pointer are equivalent, so just deallocate old memory
2341                         xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2342
2343                         if (header & header_mask) alloc->deallocate_string(dest);
2344
2345                         // mark the string as not allocated
2346                         dest = 0;
2347                         header &= ~header_mask;
2348
2349                         return true;
2350                 }
2351                 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2352                 {
2353                         // we can reuse old buffer, so just copy the new data (including zero terminator)
2354                         memcpy(dest, source, source_length * sizeof(char_t));
2355                         dest[source_length] = 0;
2356
2357                         return true;
2358                 }
2359                 else
2360                 {
2361                         xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2362
2363                         if (!alloc->reserve()) return false;
2364
2365                         // allocate new buffer
2366                         char_t* buf = alloc->allocate_string(source_length + 1);
2367                         if (!buf) return false;
2368
2369                         // copy the string (including zero terminator)
2370                         memcpy(buf, source, source_length * sizeof(char_t));
2371                         buf[source_length] = 0;
2372
2373                         // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2374                         if (header & header_mask && dest) alloc->deallocate_string(dest);
2375
2376                         // the string is now allocated, so set the flag
2377                         dest = buf;
2378                         header |= header_mask;
2379
2380                         return true;
2381                 }
2382         }
2383
2384         struct gap
2385         {
2386                 char_t* end;
2387                 size_t size;
2388
2389                 gap(): end(0), size(0)
2390                 {
2391                 }
2392
2393                 // Push new gap, move s count bytes further (skipping the gap).
2394                 // Collapse previous gap.
2395                 void push(char_t*& s, size_t count)
2396                 {
2397                         if (end) // there was a gap already; collapse it
2398                         {
2399                                 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2400                                 assert(s >= end);
2401                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2402                         }
2403
2404                         s += count; // end of current gap
2405
2406                         // "merge" two gaps
2407                         end = s;
2408                         size += count;
2409                 }
2410
2411                 // Collapse all gaps, return past-the-end pointer
2412                 char_t* flush(char_t* s)
2413                 {
2414                         if (end)
2415                         {
2416                                 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2417                                 assert(s >= end);
2418                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2419
2420                                 return s - size;
2421                         }
2422                         else return s;
2423                 }
2424         };
2425
2426         PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2427         {
2428                 char_t* stre = s + 1;
2429
2430                 switch (*stre)
2431                 {
2432                         case '#':       // &#...
2433                         {
2434                                 unsigned int ucsc = 0;
2435
2436                                 if (stre[1] == 'x') // &#x... (hex code)
2437                                 {
2438                                         stre += 2;
2439
2440                                         char_t ch = *stre;
2441
2442                                         if (ch == ';') return stre;
2443
2444                                         for (;;)
2445                                         {
2446                                                 if (static_cast<unsigned int>(ch - '0') <= 9)
2447                                                         ucsc = 16 * ucsc + (ch - '0');
2448                                                 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2449                                                         ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2450                                                 else if (ch == ';')
2451                                                         break;
2452                                                 else // cancel
2453                                                         return stre;
2454
2455                                                 ch = *++stre;
2456                                         }
2457
2458                                         ++stre;
2459                                 }
2460                                 else    // &#... (dec code)
2461                                 {
2462                                         char_t ch = *++stre;
2463
2464                                         if (ch == ';') return stre;
2465
2466                                         for (;;)
2467                                         {
2468                                                 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2469                                                         ucsc = 10 * ucsc + (ch - '0');
2470                                                 else if (ch == ';')
2471                                                         break;
2472                                                 else // cancel
2473                                                         return stre;
2474
2475                                                 ch = *++stre;
2476                                         }
2477
2478                                         ++stre;
2479                                 }
2480
2481                         #ifdef PUGIXML_WCHAR_MODE
2482                                 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2483                         #else
2484                                 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2485                         #endif
2486
2487                                 g.push(s, stre - s);
2488                                 return stre;
2489                         }
2490
2491                         case 'a':       // &a
2492                         {
2493                                 ++stre;
2494
2495                                 if (*stre == 'm') // &am
2496                                 {
2497                                         if (*++stre == 'p' && *++stre == ';') // &amp;
2498                                         {
2499                                                 *s++ = '&';
2500                                                 ++stre;
2501
2502                                                 g.push(s, stre - s);
2503                                                 return stre;
2504                                         }
2505                                 }
2506                                 else if (*stre == 'p') // &ap
2507                                 {
2508                                         if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2509                                         {
2510                                                 *s++ = '\'';
2511                                                 ++stre;
2512
2513                                                 g.push(s, stre - s);
2514                                                 return stre;
2515                                         }
2516                                 }
2517                                 break;
2518                         }
2519
2520                         case 'g': // &g
2521                         {
2522                                 if (*++stre == 't' && *++stre == ';') // &gt;
2523                                 {
2524                                         *s++ = '>';
2525                                         ++stre;
2526
2527                                         g.push(s, stre - s);
2528                                         return stre;
2529                                 }
2530                                 break;
2531                         }
2532
2533                         case 'l': // &l
2534                         {
2535                                 if (*++stre == 't' && *++stre == ';') // &lt;
2536                                 {
2537                                         *s++ = '<';
2538                                         ++stre;
2539
2540                                         g.push(s, stre - s);
2541                                         return stre;
2542                                 }
2543                                 break;
2544                         }
2545
2546                         case 'q': // &q
2547                         {
2548                                 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2549                                 {
2550                                         *s++ = '"';
2551                                         ++stre;
2552
2553                                         g.push(s, stre - s);
2554                                         return stre;
2555                                 }
2556                                 break;
2557                         }
2558
2559                         default:
2560                                 break;
2561                 }
2562
2563                 return stre;
2564         }
2565
2566         // Parser utilities
2567         #define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2568         #define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2569         #define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2570         #define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2571         #define PUGI__POPNODE()             { cursor = cursor->parent; }
2572         #define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2573         #define PUGI__SCANWHILE(X)          { while (X) ++s; }
2574         #define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2575         #define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2576         #define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2577         #define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2578
2579         PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2580         {
2581                 gap g;
2582
2583                 while (true)
2584                 {
2585                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2586
2587                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2588                         {
2589                                 *s++ = '\n'; // replace first one with 0x0a
2590
2591                                 if (*s == '\n') g.push(s, 1);
2592                         }
2593                         else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2594                         {
2595                                 *g.flush(s) = 0;
2596
2597                                 return s + (s[2] == '>' ? 3 : 2);
2598                         }
2599                         else if (*s == 0)
2600                         {
2601                                 return 0;
2602                         }
2603                         else ++s;
2604                 }
2605         }
2606
2607         PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2608         {
2609                 gap g;
2610
2611                 while (true)
2612                 {
2613                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2614
2615                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2616                         {
2617                                 *s++ = '\n'; // replace first one with 0x0a
2618
2619                                 if (*s == '\n') g.push(s, 1);
2620                         }
2621                         else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2622                         {
2623                                 *g.flush(s) = 0;
2624
2625                                 return s + 1;
2626                         }
2627                         else if (*s == 0)
2628                         {
2629                                 return 0;
2630                         }
2631                         else ++s;
2632                 }
2633         }
2634
2635         typedef char_t* (*strconv_pcdata_t)(char_t*);
2636
2637         template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2638         {
2639                 static char_t* parse(char_t* s)
2640                 {
2641                         gap g;
2642
2643                         char_t* begin = s;
2644
2645                         while (true)
2646                         {
2647                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2648
2649                                 if (*s == '<') // PCDATA ends here
2650                                 {
2651                                         char_t* end = g.flush(s);
2652
2653                                         if (opt_trim::value)
2654                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2655                                                         --end;
2656
2657                                         *end = 0;
2658
2659                                         return s + 1;
2660                                 }
2661                                 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2662                                 {
2663                                         *s++ = '\n'; // replace first one with 0x0a
2664
2665                                         if (*s == '\n') g.push(s, 1);
2666                                 }
2667                                 else if (opt_escape::value && *s == '&')
2668                                 {
2669                                         s = strconv_escape(s, g);
2670                                 }
2671                                 else if (*s == 0)
2672                                 {
2673                                         char_t* end = g.flush(s);
2674
2675                                         if (opt_trim::value)
2676                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2677                                                         --end;
2678
2679                                         *end = 0;
2680
2681                                         return s;
2682                                 }
2683                                 else ++s;
2684                         }
2685                 }
2686         };
2687
2688         PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2689         {
2690                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2691
2692                 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2693                 {
2694                 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2695                 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2696                 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2697                 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2698                 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2699                 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2700                 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2701                 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2702                 default: assert(false); return 0; // should not get here
2703                 }
2704         }
2705
2706         typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2707
2708         template <typename opt_escape> struct strconv_attribute_impl
2709         {
2710                 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2711                 {
2712                         gap g;
2713
2714                         // trim leading whitespaces
2715                         if (PUGI__IS_CHARTYPE(*s, ct_space))
2716                         {
2717                                 char_t* str = s;
2718
2719                                 do ++str;
2720                                 while (PUGI__IS_CHARTYPE(*str, ct_space));
2721
2722                                 g.push(s, str - s);
2723                         }
2724
2725                         while (true)
2726                         {
2727                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2728
2729                                 if (*s == end_quote)
2730                                 {
2731                                         char_t* str = g.flush(s);
2732
2733                                         do *str-- = 0;
2734                                         while (PUGI__IS_CHARTYPE(*str, ct_space));
2735
2736                                         return s + 1;
2737                                 }
2738                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2739                                 {
2740                                         *s++ = ' ';
2741
2742                                         if (PUGI__IS_CHARTYPE(*s, ct_space))
2743                                         {
2744                                                 char_t* str = s + 1;
2745                                                 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2746
2747                                                 g.push(s, str - s);
2748                                         }
2749                                 }
2750                                 else if (opt_escape::value && *s == '&')
2751                                 {
2752                                         s = strconv_escape(s, g);
2753                                 }
2754                                 else if (!*s)
2755                                 {
2756                                         return 0;
2757                                 }
2758                                 else ++s;
2759                         }
2760                 }
2761
2762                 static char_t* parse_wconv(char_t* s, char_t end_quote)
2763                 {
2764                         gap g;
2765
2766                         while (true)
2767                         {
2768                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2769
2770                                 if (*s == end_quote)
2771                                 {
2772                                         *g.flush(s) = 0;
2773
2774                                         return s + 1;
2775                                 }
2776                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2777                                 {
2778                                         if (*s == '\r')
2779                                         {
2780                                                 *s++ = ' ';
2781
2782                                                 if (*s == '\n') g.push(s, 1);
2783                                         }
2784                                         else *s++ = ' ';
2785                                 }
2786                                 else if (opt_escape::value && *s == '&')
2787                                 {
2788                                         s = strconv_escape(s, g);
2789                                 }
2790                                 else if (!*s)
2791                                 {
2792                                         return 0;
2793                                 }
2794                                 else ++s;
2795                         }
2796                 }
2797
2798                 static char_t* parse_eol(char_t* s, char_t end_quote)
2799                 {
2800                         gap g;
2801
2802                         while (true)
2803                         {
2804                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2805
2806                                 if (*s == end_quote)
2807                                 {
2808                                         *g.flush(s) = 0;
2809
2810                                         return s + 1;
2811                                 }
2812                                 else if (*s == '\r')
2813                                 {
2814                                         *s++ = '\n';
2815
2816                                         if (*s == '\n') g.push(s, 1);
2817                                 }
2818                                 else if (opt_escape::value && *s == '&')
2819                                 {
2820                                         s = strconv_escape(s, g);
2821                                 }
2822                                 else if (!*s)
2823                                 {
2824                                         return 0;
2825                                 }
2826                                 else ++s;
2827                         }
2828                 }
2829
2830                 static char_t* parse_simple(char_t* s, char_t end_quote)
2831                 {
2832                         gap g;
2833
2834                         while (true)
2835                         {
2836                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2837
2838                                 if (*s == end_quote)
2839                                 {
2840                                         *g.flush(s) = 0;
2841
2842                                         return s + 1;
2843                                 }
2844                                 else if (opt_escape::value && *s == '&')
2845                                 {
2846                                         s = strconv_escape(s, g);
2847                                 }
2848                                 else if (!*s)
2849                                 {
2850                                         return 0;
2851                                 }
2852                                 else ++s;
2853                         }
2854                 }
2855         };
2856
2857         PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2858         {
2859                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2860
2861                 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2862                 {
2863                 case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2864                 case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2865                 case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2866                 case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2867                 case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2868                 case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2869                 case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2870                 case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2871                 case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2872                 case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2873                 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2874                 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2875                 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2876                 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2877                 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2878                 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2879                 default: assert(false); return 0; // should not get here
2880                 }
2881         }
2882
2883         inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2884         {
2885                 xml_parse_result result;
2886                 result.status = status;
2887                 result.offset = offset;
2888
2889                 return result;
2890         }
2891
2892         struct xml_parser
2893         {
2894                 xml_allocator alloc;
2895                 xml_allocator* alloc_state;
2896                 char_t* error_offset;
2897                 xml_parse_status error_status;
2898
2899                 xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
2900                 {
2901                 }
2902
2903                 ~xml_parser()
2904                 {
2905                         *alloc_state = alloc;
2906                 }
2907
2908                 // DOCTYPE consists of nested sections of the following possible types:
2909                 // <!-- ... -->, <? ... ?>, "...", '...'
2910                 // <![...]]>
2911                 // <!...>
2912                 // First group can not contain nested groups
2913                 // Second group can contain nested groups of the same type
2914                 // Third group can contain all other groups
2915                 char_t* parse_doctype_primitive(char_t* s)
2916                 {
2917                         if (*s == '"' || *s == '\'')
2918                         {
2919                                 // quoted string
2920                                 char_t ch = *s++;
2921                                 PUGI__SCANFOR(*s == ch);
2922                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2923
2924                                 s++;
2925                         }
2926                         else if (s[0] == '<' && s[1] == '?')
2927                         {
2928                                 // <? ... ?>
2929                                 s += 2;
2930                                 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2931                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2932
2933                                 s += 2;
2934                         }
2935                         else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2936                         {
2937                                 s += 4;
2938                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2939                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2940
2941                                 s += 3;
2942                         }
2943                         else PUGI__THROW_ERROR(status_bad_doctype, s);
2944
2945                         return s;
2946                 }
2947
2948                 char_t* parse_doctype_ignore(char_t* s)
2949                 {
2950                         size_t depth = 0;
2951
2952                         assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2953                         s += 3;
2954
2955                         while (*s)
2956                         {
2957                                 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2958                                 {
2959                                         // nested ignore section
2960                                         s += 3;
2961                                         depth++;
2962                                 }
2963                                 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2964                                 {
2965                                         // ignore section end
2966                                         s += 3;
2967
2968                                         if (depth == 0)
2969                                                 return s;
2970
2971                                         depth--;
2972                                 }
2973                                 else s++;
2974                         }
2975
2976                         PUGI__THROW_ERROR(status_bad_doctype, s);
2977                 }
2978
2979                 char_t* parse_doctype_group(char_t* s, char_t endch)
2980                 {
2981                         size_t depth = 0;
2982
2983                         assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2984                         s += 2;
2985
2986                         while (*s)
2987                         {
2988                                 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2989                                 {
2990                                         if (s[2] == '[')
2991                                         {
2992                                                 // ignore
2993                                                 s = parse_doctype_ignore(s);
2994                                                 if (!s) return s;
2995                                         }
2996                                         else
2997                                         {
2998                                                 // some control group
2999                                                 s += 2;
3000                                                 depth++;
3001                                         }
3002                                 }
3003                                 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3004                                 {
3005                                         // unknown tag (forbidden), or some primitive group
3006                                         s = parse_doctype_primitive(s);
3007                                         if (!s) return s;
3008                                 }
3009                                 else if (*s == '>')
3010                                 {
3011                                         if (depth == 0)
3012                                                 return s;
3013
3014                                         depth--;
3015                                         s++;
3016                                 }
3017                                 else s++;
3018                         }
3019
3020                         if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3021
3022                         return s;
3023                 }
3024
3025                 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3026                 {
3027                         // parse node contents, starting with exclamation mark
3028                         ++s;
3029
3030                         if (*s == '-') // '<!-...'
3031                         {
3032                                 ++s;
3033
3034                                 if (*s == '-') // '<!--...'
3035                                 {
3036                                         ++s;
3037
3038                                         if (PUGI__OPTSET(parse_comments))
3039                                         {
3040                                                 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3041                                                 cursor->value = s; // Save the offset.
3042                                         }
3043
3044                                         if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3045                                         {
3046                                                 s = strconv_comment(s, endch);
3047
3048                                                 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3049                                         }
3050                                         else
3051                                         {
3052                                                 // Scan for terminating '-->'.
3053                                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3054                                                 PUGI__CHECK_ERROR(status_bad_comment, s);
3055
3056                                                 if (PUGI__OPTSET(parse_comments))
3057                                                         *s = 0; // Zero-terminate this segment at the first terminating '-'.
3058
3059                                                 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3060                                         }
3061                                 }
3062                                 else PUGI__THROW_ERROR(status_bad_comment, s);
3063                         }
3064                         else if (*s == '[')
3065                         {
3066                                 // '<![CDATA[...'
3067                                 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3068                                 {
3069                                         ++s;
3070
3071                                         if (PUGI__OPTSET(parse_cdata))
3072                                         {
3073                                                 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3074                                                 cursor->value = s; // Save the offset.
3075
3076                                                 if (PUGI__OPTSET(parse_eol))
3077                                                 {
3078                                                         s = strconv_cdata(s, endch);
3079
3080                                                         if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3081                                                 }
3082                                                 else
3083                                                 {
3084                                                         // Scan for terminating ']]>'.
3085                                                         PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3086                                                         PUGI__CHECK_ERROR(status_bad_cdata, s);
3087
3088                                                         *s++ = 0; // Zero-terminate this segment.
3089                                                 }
3090                                         }
3091                                         else // Flagged for discard, but we still have to scan for the terminator.
3092                                         {
3093                                                 // Scan for terminating ']]>'.
3094                                                 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3095                                                 PUGI__CHECK_ERROR(status_bad_cdata, s);
3096
3097                                                 ++s;
3098                                         }
3099
3100                                         s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3101                                 }
3102                                 else PUGI__THROW_ERROR(status_bad_cdata, s);
3103                         }
3104                         else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3105                         {
3106                                 s -= 2;
3107
3108                                 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3109
3110                                 char_t* mark = s + 9;
3111
3112                                 s = parse_doctype_group(s, endch);
3113                                 if (!s) return s;
3114
3115                                 assert((*s == 0 && endch == '>') || *s == '>');
3116                                 if (*s) *s++ = 0;
3117
3118                                 if (PUGI__OPTSET(parse_doctype))
3119                                 {
3120                                         while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3121
3122                                         PUGI__PUSHNODE(node_doctype);
3123
3124                                         cursor->value = mark;
3125                                 }
3126                         }
3127                         else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3128                         else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3129                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3130
3131                         return s;
3132                 }
3133
3134                 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3135                 {
3136                         // load into registers
3137                         xml_node_struct* cursor = ref_cursor;
3138                         char_t ch = 0;
3139
3140                         // parse node contents, starting with question mark
3141                         ++s;
3142
3143                         // read PI target
3144                         char_t* target = s;
3145
3146                         if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3147
3148                         PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3149                         PUGI__CHECK_ERROR(status_bad_pi, s);
3150
3151                         // determine node type; stricmp / strcasecmp is not portable
3152                         bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3153
3154                         if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3155                         {
3156                                 if (declaration)
3157                                 {
3158                                         // disallow non top-level declarations
3159                                         if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3160
3161                                         PUGI__PUSHNODE(node_declaration);
3162                                 }
3163                                 else
3164                                 {
3165                                         PUGI__PUSHNODE(node_pi);
3166                                 }
3167
3168                                 cursor->name = target;
3169
3170                                 PUGI__ENDSEG();
3171
3172                                 // parse value/attributes
3173                                 if (ch == '?')
3174                                 {
3175                                         // empty node
3176                                         if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3177                                         s += (*s == '>');
3178
3179                                         PUGI__POPNODE();
3180                                 }
3181                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3182                                 {
3183                                         PUGI__SKIPWS();
3184
3185                                         // scan for tag end
3186                                         char_t* value = s;
3187
3188                                         PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3189                                         PUGI__CHECK_ERROR(status_bad_pi, s);
3190
3191                                         if (declaration)
3192                                         {
3193                                                 // replace ending ? with / so that 'element' terminates properly
3194                                                 *s = '/';
3195
3196                                                 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3197                                                 s = value;
3198                                         }
3199                                         else
3200                                         {
3201                                                 // store value and step over >
3202                                                 cursor->value = value;
3203
3204                                                 PUGI__POPNODE();
3205
3206                                                 PUGI__ENDSEG();
3207
3208                                                 s += (*s == '>');
3209                                         }
3210                                 }
3211                                 else PUGI__THROW_ERROR(status_bad_pi, s);
3212                         }
3213                         else
3214                         {
3215                                 // scan for tag end
3216                                 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3217                                 PUGI__CHECK_ERROR(status_bad_pi, s);
3218
3219                                 s += (s[1] == '>' ? 2 : 1);
3220                         }
3221
3222                         // store from registers
3223                         ref_cursor = cursor;
3224
3225                         return s;
3226                 }
3227
3228                 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3229                 {
3230                         strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3231                         strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3232                         if (!strconv_pcdata) {
3233                                 return nullptr;
3234                         }
3235                         char_t ch = 0;
3236                         xml_node_struct* cursor = root;
3237                         char_t* mark = s;
3238
3239                         while (*s != 0)
3240                         {
3241                                 if (*s == '<')
3242                                 {
3243                                         ++s;
3244
3245                                 LOC_TAG:
3246                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3247                                         {
3248                                                 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3249
3250                                                 cursor->name = s;
3251
3252                                                 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3253                                                 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3254
3255                                                 if (ch == '>')
3256                                                 {
3257                                                         // end of tag
3258                                                 }
3259                                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3260                                                 {
3261                                                 LOC_ATTRIBUTES:
3262                                                         while (true)
3263                                                         {
3264                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3265
3266                                                                 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3267                                                                 {
3268                                                                         xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
3269                                                                         if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3270
3271                                                                         a->name = s; // Save the offset.
3272
3273                                                                         PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3274                                                                         PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3275
3276                                                                         if (PUGI__IS_CHARTYPE(ch, ct_space))
3277                                                                         {
3278                                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3279
3280                                                                                 ch = *s;
3281                                                                                 ++s;
3282                                                                         }
3283
3284                                                                         if (ch == '=') // '<... #=...'
3285                                                                         {
3286                                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3287
3288                                                                                 if (*s == '"' || *s == '\'') // '<... #="...'
3289                                                                                 {
3290                                                                                         ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3291                                                                                         ++s; // Step over the quote.
3292                                                                                         a->value = s; // Save the offset.
3293
3294                                                                                         s = strconv_attribute(s, ch);
3295
3296                                                                                         if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3297
3298                                                                                         // After this line the loop continues from the start;
3299                                                                                         // Whitespaces, / and > are ok, symbols and EOF are wrong,
3300                                                                                         // everything else will be detected
3301                                                                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3302                                                                                 }
3303                                                                                 else PUGI__THROW_ERROR(status_bad_attribute, s);
3304                                                                         }
3305                                                                         else PUGI__THROW_ERROR(status_bad_attribute, s);
3306                                                                 }
3307                                                                 else if (*s == '/')
3308                                                                 {
3309                                                                         ++s;
3310
3311                                                                         if (*s == '>')
3312                                                                         {
3313                                                                                 PUGI__POPNODE();
3314                                                                                 s++;
3315                                                                                 break;
3316                                                                         }
3317                                                                         else if (*s == 0 && endch == '>')
3318                                                                         {
3319                                                                                 PUGI__POPNODE();
3320                                                                                 break;
3321                                                                         }
3322                                                                         else PUGI__THROW_ERROR(status_bad_start_element, s);
3323                                                                 }
3324                                                                 else if (*s == '>')
3325                                                                 {
3326                                                                         ++s;
3327
3328                                                                         break;
3329                                                                 }
3330                                                                 else if (*s == 0 && endch == '>')
3331                                                                 {
3332                                                                         break;
3333                                                                 }
3334                                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);
3335                                                         }
3336
3337                                                         // !!!
3338                                                 }
3339                                                 else if (ch == '/') // '<#.../'
3340                                                 {
3341                                                         if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3342
3343                                                         PUGI__POPNODE(); // Pop.
3344
3345                                                         s += (*s == '>');
3346                                                 }
3347                                                 else if (ch == 0)
3348                                                 {
3349                                                         // we stepped over null terminator, backtrack & handle closing tag
3350                                                         --s;
3351
3352                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3353                                                 }
3354                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);
3355                                         }
3356                                         else if (*s == '/')
3357                                         {
3358                                                 ++s;
3359
3360                                                 char_t* name = cursor->name;
3361                                                 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3362
3363                                                 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3364                                                 {
3365                                                         if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3366                                                 }
3367
3368                                                 if (*name)
3369                                                 {
3370                                                         if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3371                                                         else PUGI__THROW_ERROR(status_end_element_mismatch, s);
3372                                                 }
3373
3374                                                 PUGI__POPNODE(); // Pop.
3375
3376                                                 PUGI__SKIPWS();
3377
3378                                                 if (*s == 0)
3379                                                 {
3380                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3381                                                 }
3382                                                 else
3383                                                 {
3384                                                         if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3385                                                         ++s;
3386                                                 }
3387                                         }
3388                                         else if (*s == '?') // '<?...'
3389                                         {
3390                                                 s = parse_question(s, cursor, optmsk, endch);
3391                                                 if (!s) return s;
3392
3393                                                 assert(cursor);
3394                                                 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3395                                         }
3396                                         else if (*s == '!') // '<!...'
3397                                         {
3398                                                 s = parse_exclamation(s, cursor, optmsk, endch);
3399                                                 if (!s) return s;
3400                                         }
3401                                         else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3402                                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3403                                 }
3404                                 else
3405                                 {
3406                                         mark = s; // Save this offset while searching for a terminator.
3407
3408                                         PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3409
3410                                         if (*s == '<' || !*s)
3411                                         {
3412                                                 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3413                                                 assert(mark != s);
3414
3415                                                 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3416                                                 {
3417                                                         continue;
3418                                                 }
3419                                                 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3420                                                 {
3421                                                         if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3422                                                 }
3423                                         }
3424
3425                                         if (!PUGI__OPTSET(parse_trim_pcdata))
3426                                                 s = mark;
3427
3428                                         if (cursor->parent || PUGI__OPTSET(parse_fragment))
3429                                         {
3430                                                 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3431                                                 {
3432                                                         cursor->value = s; // Save the offset.
3433                                                 }
3434                                                 else
3435                                                 {
3436                                                         PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3437
3438                                                         cursor->value = s; // Save the offset.
3439
3440                                                         PUGI__POPNODE(); // Pop since this is a standalone.
3441                                                 }
3442
3443                                                 s = strconv_pcdata(s);
3444
3445                                                 if (!*s) break;
3446                                         }
3447                                         else
3448                                         {
3449                                                 PUGI__SCANFOR(*s == '<'); // '...<'
3450                                                 if (!*s) break;
3451
3452                                                 ++s;
3453                                         }
3454
3455                                         // We're after '<'
3456                                         goto LOC_TAG;
3457                                 }
3458                         }
3459
3460                         // check that last tag is closed
3461                         if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3462
3463                         return s;
3464                 }
3465
3466         #ifdef PUGIXML_WCHAR_MODE
3467                 static char_t* parse_skip_bom(char_t* s)
3468                 {
3469                         unsigned int bom = 0xfeff;
3470                         return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3471                 }
3472         #else
3473                 static char_t* parse_skip_bom(char_t* s)
3474                 {
3475                         return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3476                 }
3477         #endif
3478
3479                 static bool has_element_node_siblings(xml_node_struct* node)
3480                 {
3481                         while (node)
3482                         {
3483                                 if (PUGI__NODETYPE(node) == node_element) return true;
3484
3485                                 node = node->next_sibling;
3486                         }
3487
3488                         return false;
3489                 }
3490
3491                 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3492                 {
3493                         // early-out for empty documents
3494                         if (length == 0)
3495                                 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3496
3497                         // get last child of the root before parsing
3498                         xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3499
3500                         // create parser on stack
3501                         xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3502
3503                         // save last character and make buffer zero-terminated (speeds up parsing)
3504                         char_t endch = buffer[length - 1];
3505                         buffer[length - 1] = 0;
3506
3507                         // skip BOM to make sure it does not end up as part of parse output
3508                         char_t* buffer_data = parse_skip_bom(buffer);
3509
3510                         // perform actual parsing
3511                         parser.parse_tree(buffer_data, root, optmsk, endch);
3512
3513                         xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3514                         assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3515
3516                         if (result)
3517                         {
3518                                 // since we removed last character, we have to handle the only possible false positive (stray <)
3519                                 if (endch == '<')
3520                                         return make_parse_result(status_unrecognized_tag, length - 1);
3521
3522                                 // check if there are any element nodes parsed
3523                                 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3524
3525                                 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3526                                         return make_parse_result(status_no_document_element, length - 1);
3527                         }
3528                         else
3529                         {
3530                                 // roll back offset if it occurs on a null terminator in the source buffer
3531                                 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3532                                         result.offset--;
3533                         }
3534
3535                         return result;
3536                 }
3537         };
3538
3539         // Output facilities
3540         PUGI__FN xml_encoding get_write_native_encoding()
3541         {
3542         #ifdef PUGIXML_WCHAR_MODE
3543                 return get_wchar_encoding();
3544         #else
3545                 return encoding_utf8;
3546         #endif
3547         }
3548
3549         PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3550         {
3551                 // replace wchar encoding with utf implementation
3552                 if (encoding == encoding_wchar) return get_wchar_encoding();
3553
3554                 // replace utf16 encoding with utf16 with specific endianness
3555                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3556
3557                 // replace utf32 encoding with utf32 with specific endianness
3558                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3559
3560                 // only do autodetection if no explicit encoding is requested
3561                 if (encoding != encoding_auto) return encoding;
3562
3563                 // assume utf8 encoding
3564                 return encoding_utf8;
3565         }
3566
3567         template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3568         {
3569                 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3570
3571                 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3572
3573                 return static_cast<size_t>(end - dest) * sizeof(*dest);
3574         }
3575
3576         template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3577         {
3578                 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3579
3580                 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3581
3582                 if (opt_swap)
3583                 {
3584                         for (typename T::value_type i = dest; i != end; ++i)
3585                                 *i = endian_swap(*i);
3586                 }
3587
3588                 return static_cast<size_t>(end - dest) * sizeof(*dest);
3589         }
3590
3591 #ifdef PUGIXML_WCHAR_MODE
3592         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3593         {
3594                 if (length < 1) return 0;
3595
3596                 // discard last character if it's the lead of a surrogate pair
3597                 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3598         }
3599
3600         PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3601         {
3602                 // only endian-swapping is required
3603                 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3604                 {
3605                         convert_wchar_endian_swap(r_char, data, length);
3606
3607                         return length * sizeof(char_t);
3608                 }
3609
3610                 // convert to utf8
3611                 if (encoding == encoding_utf8)
3612                         return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3613
3614                 // convert to utf16
3615                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3616                 {
3617                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3618
3619                         return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3620                 }
3621
3622                 // convert to utf32
3623                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3624                 {
3625                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3626
3627                         return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3628                 }
3629
3630                 // convert to latin1
3631                 if (encoding == encoding_latin1)
3632                         return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3633
3634                 assert(false && "Invalid encoding");
3635                 return 0;
3636         }
3637 #else
3638         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3639         {
3640                 if (length < 5) return 0;
3641
3642                 for (size_t i = 1; i <= 4; ++i)
3643                 {
3644                         uint8_t ch = static_cast<uint8_t>(data[length - i]);
3645
3646                         // either a standalone character or a leading one
3647                         if ((ch & 0xc0) != 0x80) return length - i;
3648                 }
3649
3650                 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3651                 return length;
3652         }
3653
3654         PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3655         {
3656                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3657                 {
3658                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3659
3660                         return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3661                 }
3662
3663                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3664                 {
3665                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3666
3667                         return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3668                 }
3669
3670                 if (encoding == encoding_latin1)
3671                         return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3672
3673                 assert(false && "Invalid encoding");
3674                 return 0;
3675         }
3676 #endif
3677
3678         class xml_buffered_writer
3679         {
3680                 xml_buffered_writer(const xml_buffered_writer&);
3681                 xml_buffered_writer& operator=(const xml_buffered_writer&);
3682
3683         public:
3684                 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3685                 {
3686                         PUGI__STATIC_ASSERT(bufcapacity >= 8);
3687                 }
3688
3689                 size_t flush()
3690                 {
3691                         flush(buffer, bufsize);
3692                         bufsize = 0;
3693                         return 0;
3694                 }
3695
3696                 void flush(const char_t* data, size_t size)
3697                 {
3698                         if (size == 0) return;
3699
3700                         // fast path, just write data
3701                         if (encoding == get_write_native_encoding())
3702                                 writer.write(data, size * sizeof(char_t));
3703                         else
3704                         {
3705                                 // convert chunk
3706                                 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3707                                 assert(result <= sizeof(scratch));
3708
3709                                 // write data
3710                                 writer.write(scratch.data_u8, result);
3711                         }
3712                 }
3713
3714                 void write_direct(const char_t* data, size_t length)
3715                 {
3716                         // flush the remaining buffer contents
3717                         flush();
3718
3719                         // handle large chunks
3720                         if (length > bufcapacity)
3721                         {
3722                                 if (encoding == get_write_native_encoding())
3723                                 {
3724                                         // fast path, can just write data chunk
3725                                         writer.write(data, length * sizeof(char_t));
3726                                         return;
3727                                 }
3728
3729                                 // need to convert in suitable chunks
3730                                 while (length > bufcapacity)
3731                                 {
3732                                         // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3733                                         // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3734                                         size_t chunk_size = get_valid_length(data, bufcapacity);
3735                                         assert(chunk_size);
3736
3737                                         // convert chunk and write
3738                                         flush(data, chunk_size);
3739
3740                                         // iterate
3741                                         data += chunk_size;
3742                                         length -= chunk_size;
3743                                 }
3744
3745                                 // small tail is copied below
3746                                 bufsize = 0;
3747                         }
3748
3749                         memcpy(buffer + bufsize, data, length * sizeof(char_t));
3750                         bufsize += length;
3751                 }
3752
3753                 void write_buffer(const char_t* data, size_t length)
3754                 {
3755                         size_t offset = bufsize;
3756
3757                         if (offset < bufcapacity && offset + length <= bufcapacity)
3758                         {
3759                                 memcpy(buffer + offset, data, length * sizeof(char_t));
3760                                 bufsize = offset + length;
3761                         }
3762                         else
3763                         {
3764                                 write_direct(data, length);
3765                         }
3766                 }
3767
3768                 void write_string(const char_t* data)
3769                 {
3770                         // write the part of the string that fits in the buffer
3771                         size_t offset = bufsize;
3772
3773                         while (*data && offset < bufcapacity)
3774                                 buffer[offset++] = *data++;
3775
3776                         // write the rest
3777                         if (offset < bufcapacity)
3778                         {
3779                                 bufsize = offset;
3780                         }
3781                         else
3782                         {
3783                                 // backtrack a bit if we have split the codepoint
3784                                 size_t length = offset - bufsize;
3785                                 size_t extra = length - get_valid_length(data - length, length);
3786
3787                                 bufsize = offset - extra;
3788
3789                                 write_direct(data - extra, strlength(data) + extra);
3790                         }
3791                 }
3792
3793                 void write(char_t d0)
3794                 {
3795                         size_t offset = bufsize;
3796                         if (offset > bufcapacity - 1) offset = flush();
3797
3798                         buffer[offset + 0] = d0;
3799                         bufsize = offset + 1;
3800                 }
3801
3802                 void write(char_t d0, char_t d1)
3803                 {
3804                         size_t offset = bufsize;
3805                         if (offset > bufcapacity - 2) offset = flush();
3806
3807                         buffer[offset + 0] = d0;
3808                         buffer[offset + 1] = d1;
3809                         bufsize = offset + 2;
3810                 }
3811
3812                 void write(char_t d0, char_t d1, char_t d2)
3813                 {
3814                         size_t offset = bufsize;
3815                         if (offset > bufcapacity - 3) offset = flush();
3816
3817                         buffer[offset + 0] = d0;
3818                         buffer[offset + 1] = d1;
3819                         buffer[offset + 2] = d2;
3820                         bufsize = offset + 3;
3821                 }
3822
3823                 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3824                 {
3825                         size_t offset = bufsize;
3826                         if (offset > bufcapacity - 4) offset = flush();
3827
3828                         buffer[offset + 0] = d0;
3829                         buffer[offset + 1] = d1;
3830                         buffer[offset + 2] = d2;
3831                         buffer[offset + 3] = d3;
3832                         bufsize = offset + 4;
3833                 }
3834
3835                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3836                 {
3837                         size_t offset = bufsize;
3838                         if (offset > bufcapacity - 5) offset = flush();
3839
3840                         buffer[offset + 0] = d0;
3841                         buffer[offset + 1] = d1;
3842                         buffer[offset + 2] = d2;
3843                         buffer[offset + 3] = d3;
3844                         buffer[offset + 4] = d4;
3845                         bufsize = offset + 5;
3846                 }
3847
3848                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3849                 {
3850                         size_t offset = bufsize;
3851                         if (offset > bufcapacity - 6) offset = flush();
3852
3853                         buffer[offset + 0] = d0;
3854                         buffer[offset + 1] = d1;
3855                         buffer[offset + 2] = d2;
3856                         buffer[offset + 3] = d3;
3857                         buffer[offset + 4] = d4;
3858                         buffer[offset + 5] = d5;
3859                         bufsize = offset + 6;
3860                 }
3861
3862                 // utf8 maximum expansion: x4 (-> utf32)
3863                 // utf16 maximum expansion: x2 (-> utf32)
3864                 // utf32 maximum expansion: x1
3865                 enum
3866                 {
3867                         bufcapacitybytes =
3868                         #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3869                                 PUGIXML_MEMORY_OUTPUT_STACK
3870                         #else
3871                                 10240
3872                         #endif
3873                         ,
3874                         bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3875                 };
3876
3877                 char_t buffer[bufcapacity];
3878
3879                 union
3880                 {
3881                         uint8_t data_u8[4 * bufcapacity];
3882                         uint16_t data_u16[2 * bufcapacity];
3883                         uint32_t data_u32[bufcapacity];
3884                         char_t data_char[bufcapacity];
3885                 } scratch;
3886
3887                 xml_writer& writer;
3888                 size_t bufsize;
3889                 xml_encoding encoding;
3890         };
3891
3892         PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3893         {
3894                 while (*s)
3895                 {
3896                         const char_t* prev = s;
3897
3898                         // While *s is a usual symbol
3899                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3900
3901                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
3902
3903                         switch (*s)
3904                         {
3905                                 case 0: break;
3906                                 case '&':
3907                                         writer.write('&', 'a', 'm', 'p', ';');
3908                                         ++s;
3909                                         break;
3910                                 case '<':
3911                                         writer.write('&', 'l', 't', ';');
3912                                         ++s;
3913                                         break;
3914                                 case '>':
3915                                         writer.write('&', 'g', 't', ';');
3916                                         ++s;
3917                                         break;
3918                                 case '"':
3919                                         writer.write('&', 'q', 'u', 'o', 't', ';');
3920                                         ++s;
3921                                         break;
3922                                 default: // s is not a usual symbol
3923                                 {
3924                                         unsigned int ch = static_cast<unsigned int>(*s++);
3925                                         assert(ch < 32);
3926
3927                                         writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3928                                 }
3929                         }
3930                 }
3931         }
3932
3933         PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3934         {
3935                 if (flags & format_no_escapes)
3936                         writer.write_string(s);
3937                 else
3938                         text_output_escaped(writer, s, type);
3939         }
3940
3941         PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3942         {
3943                 do
3944                 {
3945                         writer.write('<', '!', '[', 'C', 'D');
3946                         writer.write('A', 'T', 'A', '[');
3947
3948                         const char_t* prev = s;
3949
3950                         // look for ]]> sequence - we can't output it as is since it terminates CDATA
3951                         while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3952
3953                         // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3954                         if (*s) s += 2;
3955
3956                         size_t bufLenght = static_cast<size_t>(s - prev);
3957
3958                         writer.write_buffer(prev, bufLenght);
3959
3960                         writer.write(']', ']', '>');
3961                 }
3962                 while (*s);
3963         }
3964
3965         PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3966         {
3967                 switch (indent_length)
3968                 {
3969                 case 1:
3970                 {
3971                         for (unsigned int i = 0; i < depth; ++i)
3972                                 writer.write(indent[0]);
3973                         break;
3974                 }
3975
3976                 case 2:
3977                 {
3978                         for (unsigned int i = 0; i < depth; ++i)
3979                                 writer.write(indent[0], indent[1]);
3980                         break;
3981                 }
3982
3983                 case 3:
3984                 {
3985                         for (unsigned int i = 0; i < depth; ++i)
3986                                 writer.write(indent[0], indent[1], indent[2]);
3987                         break;
3988                 }
3989
3990                 case 4:
3991                 {
3992                         for (unsigned int i = 0; i < depth; ++i)
3993                                 writer.write(indent[0], indent[1], indent[2], indent[3]);
3994                         break;
3995                 }
3996
3997                 default:
3998                 {
3999                         for (unsigned int i = 0; i < depth; ++i)
4000                                 writer.write_buffer(indent, indent_length);
4001                 }
4002                 }
4003         }
4004
4005         PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4006         {
4007                 writer.write('<', '!', '-', '-');
4008
4009                 while (*s)
4010                 {
4011                         const char_t* prev = s;
4012
4013                         // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4014                         while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4015
4016                         size_t bufLenght = static_cast<size_t>(s - prev);
4017
4018                         writer.write_buffer(prev, bufLenght);
4019
4020                         if (*s)
4021                         {
4022                                 assert(*s == '-');
4023
4024                                 writer.write('-', ' ');
4025                                 ++s;
4026                         }
4027                 }
4028
4029                 writer.write('-', '-', '>');
4030         }
4031
4032         PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4033         {
4034                 while (*s)
4035                 {
4036                         const char_t* prev = s;
4037
4038                         // look for ?> sequence - we can't output it since ?> terminates PI
4039                         while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4040
4041                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
4042
4043                         if (*s)
4044                         {
4045                                 assert(s[0] == '?' && s[1] == '>');
4046
4047                                 writer.write('?', ' ', '>');
4048                                 s += 2;
4049                         }
4050                 }
4051         }
4052
4053         PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4054         {
4055                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4056
4057                 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4058                 {
4059                         if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4060                         {
4061                                 writer.write('\n');
4062
4063                                 text_output_indent(writer, indent, indent_length, depth + 1);
4064                         }
4065                         else
4066                         {
4067                                 writer.write(' ');
4068                         }
4069
4070                         writer.write_string(a->name ? a->name + 0 : default_name);
4071                         writer.write('=', '"');
4072
4073                         if (a->value)
4074                                 text_output(writer, a->value, ctx_special_attr, flags);
4075
4076                         writer.write('"');
4077                 }
4078         }
4079
4080         PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4081         {
4082                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4083                 const char_t* name = node->name ? node->name + 0 : default_name;
4084
4085                 writer.write('<');
4086                 writer.write_string(name);
4087
4088                 if (node->first_attribute)
4089                         node_output_attributes(writer, node, indent, indent_length, flags, depth);
4090
4091                 // element nodes can have value if parse_embed_pcdata was used
4092                 if (!node->value)
4093                 {
4094                         if (!node->first_child)
4095                         {
4096                                 if ((flags & format_raw) == 0)
4097                                         writer.write(' ');
4098
4099                                 writer.write('/', '>');
4100
4101                                 return false;
4102                         }
4103                         else
4104                         {
4105                                 writer.write('>');
4106
4107                                 return true;
4108                         }
4109                 }
4110                 else
4111                 {
4112                         writer.write('>');
4113
4114                         text_output(writer, node->value, ctx_special_pcdata, flags);
4115
4116                         if (!node->first_child)
4117                         {
4118                                 writer.write('<', '/');
4119                                 writer.write_string(name);
4120                                 writer.write('>');
4121
4122                                 return false;
4123                         }
4124                         else
4125                         {
4126                                 return true;
4127                         }
4128                 }
4129         }
4130
4131         PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4132         {
4133                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4134                 const char_t* name = node->name ? node->name + 0 : default_name;
4135
4136                 writer.write('<', '/');
4137                 writer.write_string(name);
4138                 writer.write('>');
4139         }
4140
4141         PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4142         {
4143                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4144
4145                 switch (PUGI__NODETYPE(node))
4146                 {
4147                         case node_pcdata:
4148                                 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4149                                 break;
4150
4151                         case node_cdata:
4152                                 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4153                                 break;
4154
4155                         case node_comment:
4156                                 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4157                                 break;
4158
4159                         case node_pi:
4160                                 writer.write('<', '?');
4161                                 writer.write_string(node->name ? node->name + 0 : default_name);
4162
4163                                 if (node->value)
4164                                 {
4165                                         writer.write(' ');
4166                                         node_output_pi_value(writer, node->value);
4167                                 }
4168
4169                                 writer.write('?', '>');
4170                                 break;
4171
4172                         case node_declaration:
4173                                 writer.write('<', '?');
4174                                 writer.write_string(node->name ? node->name + 0 : default_name);
4175                                 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4176                                 writer.write('?', '>');
4177                                 break;
4178
4179                         case node_doctype:
4180                                 writer.write('<', '!', 'D', 'O', 'C');
4181                                 writer.write('T', 'Y', 'P', 'E');
4182
4183                                 if (node->value)
4184                                 {
4185                                         writer.write(' ');
4186                                         writer.write_string(node->value);
4187                                 }
4188
4189                                 writer.write('>');
4190                                 break;
4191
4192                         default:
4193                                 assert(false && "Invalid node type");
4194                 }
4195         }
4196
4197         enum indent_flags_t
4198         {
4199                 indent_newline = 1,
4200                 indent_indent = 2
4201         };
4202
4203         PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4204         {
4205                 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4206                 unsigned int indent_flags = indent_indent;
4207
4208                 xml_node_struct* node = root;
4209
4210                 do
4211                 {
4212                         assert(node);
4213
4214                         // begin writing current node
4215                         if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4216                         {
4217                                 node_output_simple(writer, node, flags);
4218
4219                                 indent_flags = 0;
4220                         }
4221                         else
4222                         {
4223                                 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4224                                         writer.write('\n');
4225
4226                                 if ((indent_flags & indent_indent) && indent_length)
4227                                         text_output_indent(writer, indent, indent_length, depth);
4228
4229                                 if (PUGI__NODETYPE(node) == node_element)
4230                                 {
4231                                         indent_flags = indent_newline | indent_indent;
4232
4233                                         if (node_output_start(writer, node, indent, indent_length, flags, depth))
4234                                         {
4235                                                 // element nodes can have value if parse_embed_pcdata was used
4236                                                 if (node->value)
4237                                                         indent_flags = 0;
4238
4239                                                 node = node->first_child;
4240                                                 depth++;
4241                                                 continue;
4242                                         }
4243                                 }
4244                                 else if (PUGI__NODETYPE(node) == node_document)
4245                                 {
4246                                         indent_flags = indent_indent;
4247
4248                                         if (node->first_child)
4249                                         {
4250                                                 node = node->first_child;
4251                                                 continue;
4252                                         }
4253                                 }
4254                                 else
4255                                 {
4256                                         node_output_simple(writer, node, flags);
4257
4258                                         indent_flags = indent_newline | indent_indent;
4259                                 }
4260                         }
4261
4262                         // continue to the next node
4263                         while (node != root)
4264                         {
4265                                 if (node->next_sibling)
4266                                 {
4267                                         node = node->next_sibling;
4268                                         break;
4269                                 }
4270
4271                                 node = node->parent;
4272
4273                                 // write closing node
4274                                 if (PUGI__NODETYPE(node) == node_element)
4275                                 {
4276                                         depth--;
4277
4278                                         if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4279                                                 writer.write('\n');
4280
4281                                         if ((indent_flags & indent_indent) && indent_length)
4282                                                 text_output_indent(writer, indent, indent_length, depth);
4283
4284                                         node_output_end(writer, node);
4285
4286                                         indent_flags = indent_newline | indent_indent;
4287                                 }
4288                         }
4289                 }
4290                 while (node != root);
4291
4292                 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4293                         writer.write('\n');
4294         }
4295
4296         PUGI__FN bool has_declaration(xml_node_struct* node)
4297         {
4298                 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4299                 {
4300                         xml_node_type type = PUGI__NODETYPE(child);
4301
4302                         if (type == node_declaration) return true;
4303                         if (type == node_element) return false;
4304                 }
4305
4306                 return false;
4307         }
4308
4309         PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4310         {
4311                 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4312                         if (a == attr)
4313                                 return true;
4314
4315                 return false;
4316         }
4317
4318         PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4319         {
4320                 return parent == node_element || parent == node_declaration;
4321         }
4322
4323         PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4324         {
4325                 if (parent != node_document && parent != node_element) return false;
4326                 if (child == node_document || child == node_null) return false;
4327                 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4328
4329                 return true;
4330         }
4331
4332         PUGI__FN bool allow_move(xml_node parent, xml_node child)
4333         {
4334                 // check that child can be a child of parent
4335                 if (!allow_insert_child(parent.type(), child.type()))
4336                         return false;
4337
4338                 // check that node is not moved between documents
4339                 if (parent.root() != child.root())
4340                         return false;
4341
4342                 // check that new parent is not in the child subtree
4343                 xml_node cur = parent;
4344
4345                 while (cur)
4346                 {
4347                         if (cur == child)
4348                                 return false;
4349
4350                         cur = cur.parent();
4351                 }
4352
4353                 return true;
4354         }
4355
4356         template <typename String, typename Header>
4357         PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4358         {
4359                 assert(!dest && (header & header_mask) == 0);
4360
4361                 if (source)
4362                 {
4363                         if (alloc && (source_header & header_mask) == 0)
4364                         {
4365                                 dest = source;
4366
4367                                 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4368                                 header |= xml_memory_page_contents_shared_mask;
4369                                 source_header |= xml_memory_page_contents_shared_mask;
4370                         }
4371                         else
4372                                 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4373                 }
4374         }
4375
4376         PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4377         {
4378                 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4379                 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4380
4381                 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4382                 {
4383                         xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4384
4385                         if (da)
4386                         {
4387                                 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4388                                 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4389                         }
4390                 }
4391         }
4392
4393         PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4394         {
4395                 xml_allocator& alloc = get_allocator(dn);
4396                 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4397
4398                 node_copy_contents(dn, sn, shared_alloc);
4399
4400                 xml_node_struct* dit = dn;
4401                 xml_node_struct* sit = sn->first_child;
4402
4403                 while (sit && sit != sn)
4404                 {
4405                         if (sit != dn)
4406                         {
4407                                 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4408
4409                                 if (copy)
4410                                 {
4411                                         node_copy_contents(copy, sit, shared_alloc);
4412
4413                                         if (sit->first_child)
4414                                         {
4415                                                 dit = copy;
4416                                                 sit = sit->first_child;
4417                                                 continue;
4418                                         }
4419                                 }
4420                         }
4421
4422                         // continue to the next node
4423                         do
4424                         {
4425                                 if (sit->next_sibling)
4426                                 {
4427                                         sit = sit->next_sibling;
4428                                         break;
4429                                 }
4430
4431                                 sit = sit->parent;
4432                                 dit = dit->parent;
4433                         }
4434                         while (sit != sn);
4435                 }
4436         }
4437
4438         PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4439         {
4440                 xml_allocator& alloc = get_allocator(da);
4441                 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4442
4443                 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4444                 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4445         }
4446
4447         inline bool is_text_node(xml_node_struct* node)
4448         {
4449                 xml_node_type type = PUGI__NODETYPE(node);
4450
4451                 return type == node_pcdata || type == node_cdata;
4452         }
4453
4454         // get value with conversion functions
4455         template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4456         {
4457                 U result = 0;
4458                 const char_t* s = value;
4459
4460                 while (PUGI__IS_CHARTYPE(*s, ct_space))
4461                         s++;
4462
4463                 bool negative = (*s == '-');
4464
4465                 s += (*s == '+' || *s == '-');
4466
4467                 bool overflow = false;
4468
4469                 if (s[0] == '0' && (s[1] | ' ') == 'x')
4470                 {
4471                         s += 2;
4472
4473                         // since overflow detection relies on length of the sequence skip leading zeros
4474                         while (*s == '0')
4475                                 s++;
4476
4477                         const char_t* start = s;
4478
4479                         for (;;)
4480                         {
4481                                 if (static_cast<unsigned>(*s - '0') < 10)
4482                                         result = result * 16 + (*s - '0');
4483                                 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4484                                         result = result * 16 + ((*s | ' ') - 'a' + 10);
4485                                 else
4486                                         break;
4487
4488                                 s++;
4489                         }
4490
4491                         size_t digits = static_cast<size_t>(s - start);
4492
4493                         overflow = digits > sizeof(U) * 2;
4494                 }
4495                 else
4496                 {
4497                         // since overflow detection relies on length of the sequence skip leading zeros
4498                         while (*s == '0')
4499                                 s++;
4500
4501                         const char_t* start = s;
4502
4503                         for (;;)
4504                         {
4505                                 if (static_cast<unsigned>(*s - '0') < 10)
4506                                         result = result * 10 + (*s - '0');
4507                                 else
4508                                         break;
4509
4510                                 s++;
4511                         }
4512
4513                         size_t digits = static_cast<size_t>(s - start);
4514
4515                         PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4516
4517                         const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4518                         const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4519                         const size_t high_bit = sizeof(U) * 8 - 1;
4520
4521                         overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4522                 }
4523
4524                 if (negative)
4525                         return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4526                 else
4527                         return (overflow || result > maxpos) ? maxpos : result;
4528         }
4529
4530         PUGI__FN int get_value_int(const char_t* value)
4531         {
4532                 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4533         }
4534
4535         PUGI__FN unsigned int get_value_uint(const char_t* value)
4536         {
4537                 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4538         }
4539
4540         PUGI__FN double get_value_double(const char_t* value)
4541         {
4542         #ifdef PUGIXML_WCHAR_MODE
4543                 return wcstod(value, 0);
4544         #else
4545                 return strtod(value, 0);
4546         #endif
4547         }
4548
4549         PUGI__FN float get_value_float(const char_t* value)
4550         {
4551         #ifdef PUGIXML_WCHAR_MODE
4552                 return static_cast<float>(wcstod(value, 0));
4553         #else
4554                 return static_cast<float>(strtod(value, 0));
4555         #endif
4556         }
4557
4558         PUGI__FN bool get_value_bool(const char_t* value)
4559         {
4560                 // only look at first char
4561                 char_t first = *value;
4562
4563                 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4564                 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4565         }
4566
4567 #ifdef PUGIXML_HAS_LONG_LONG
4568         PUGI__FN long long get_value_llong(const char_t* value)
4569         {
4570                 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4571         }
4572
4573         PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4574         {
4575                 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4576         }
4577 #endif
4578
4579         template <typename T> struct make_unsigned;
4580
4581         template <> struct make_unsigned<int> { typedef unsigned int type; };
4582         template <> struct make_unsigned<unsigned int> { typedef unsigned int type; };
4583         template <> struct make_unsigned<long> { typedef unsigned long type; };
4584         template <> struct make_unsigned<unsigned long> { typedef unsigned long type; };
4585
4586 #ifdef PUGIXML_HAS_LONG_LONG
4587         template <> struct make_unsigned<long long> { typedef unsigned long long type; };
4588         template <> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
4589 #endif
4590
4591         template <typename T>
4592         PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, T value)
4593         {
4594                 typedef typename make_unsigned<T>::type U;
4595
4596                 bool negative = value < 0;
4597
4598                 char_t* result = end - 1;
4599                 U rest = negative ? 0 - U(value) : U(value);
4600
4601                 do
4602                 {
4603                         *result-- = static_cast<char_t>('0' + (rest % 10));
4604                         rest /= 10;
4605                 }
4606                 while (rest);
4607
4608                 assert(result >= begin);
4609                 (void)begin;
4610
4611                 *result = '-';
4612
4613                 return result + !negative;
4614         }
4615
4616         // set value with conversion functions
4617         template <typename String, typename Header>
4618         PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4619         {
4620         #ifdef PUGIXML_WCHAR_MODE
4621                 char_t wbuf[128];
4622                 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4623
4624                 size_t offset = 0;
4625                 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4626
4627                 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4628         #else
4629                 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4630         #endif
4631         }
4632
4633         template <typename String, typename Header, typename Integer>
4634         PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, Integer value)
4635         {
4636                 char_t buf[64];
4637                 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4638                 char_t* begin = integer_to_string(buf, end, value);
4639
4640                 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4641         }
4642
4643         template <typename String, typename Header>
4644         PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4645         {
4646                 char buf[128];
4647                 sprintf(buf, "%.9g", value);
4648
4649                 return set_value_ascii(dest, header, header_mask, buf);
4650         }
4651
4652         template <typename String, typename Header>
4653         PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4654         {
4655                 char buf[128];
4656                 sprintf(buf, "%.17g", value);
4657
4658                 return set_value_ascii(dest, header, header_mask, buf);
4659         }
4660
4661         template <typename String, typename Header>
4662         PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
4663         {
4664                 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4665         }
4666
4667         PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4668         {
4669                 // check input buffer
4670                 if (!contents && size) return make_parse_result(status_io_error);
4671
4672                 // get actual encoding
4673                 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4674
4675                 // get private buffer
4676                 char_t* buffer = 0;
4677                 size_t length = 0;
4678
4679                 if (!contents) {
4680                         xml_parse_result failed;
4681                         failed.status = xml_parse_status::status_internal_error;
4682                         failed.offset = (ptrdiff_t)0;
4683                         return failed;
4684                 }
4685
4686                 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4687
4688                 // delete original buffer if we performed a conversion
4689                 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4690
4691                 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4692                 if (own || buffer != contents) *out_buffer = buffer;
4693
4694                 // store buffer for offset_debug
4695                 doc->buffer = buffer;
4696
4697                 // parse
4698                 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4699
4700                 // remember encoding
4701                 res.encoding = buffer_encoding;
4702
4703                 return res;
4704         }
4705
4706         // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4707         PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4708         {
4709         #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4710                 // there are 64-bit versions of fseek/ftell, let's use them
4711                 typedef __int64 length_type;
4712
4713                 _fseeki64(file, 0, SEEK_END);
4714                 length_type length = _ftelli64(file);
4715                 _fseeki64(file, 0, SEEK_SET);
4716         #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4717                 // there are 64-bit versions of fseek/ftell, let's use them
4718                 typedef off64_t length_type;
4719
4720                 fseeko64(file, 0, SEEK_END);
4721                 length_type length = ftello64(file);
4722                 fseeko64(file, 0, SEEK_SET);
4723         #else
4724                 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4725                 typedef long length_type;
4726
4727                 fseek(file, 0, SEEK_END);
4728                 length_type length = ftell(file);
4729                 fseek(file, 0, SEEK_SET);
4730         #endif
4731
4732                 // check for I/O errors
4733                 if (length < 0) return status_io_error;
4734
4735                 // check for overflow
4736                 size_t result = static_cast<size_t>(length);
4737
4738                 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4739
4740                 // finalize
4741                 out_result = result;
4742
4743                 return status_ok;
4744         }
4745
4746         // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4747         PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4748         {
4749                 // We only need to zero-terminate if encoding conversion does not do it for us
4750         #ifdef PUGIXML_WCHAR_MODE
4751                 xml_encoding wchar_encoding = get_wchar_encoding();
4752
4753                 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4754                 {
4755                         size_t length = size / sizeof(char_t);
4756
4757                         static_cast<char_t*>(buffer)[length] = 0;
4758                         return (length + 1) * sizeof(char_t);
4759                 }
4760         #else
4761                 if (encoding == encoding_utf8)
4762                 {
4763                         static_cast<char*>(buffer)[size] = 0;
4764                         return size + 1;
4765                 }
4766         #endif
4767
4768                 return size;
4769         }
4770
4771         PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4772         {
4773                 if (!file) return make_parse_result(status_file_not_found);
4774
4775                 // get file size (can result in I/O errors)
4776                 size_t size = 0;
4777                 xml_parse_status size_status = get_file_size(file, size);
4778                 if (size_status != status_ok) return make_parse_result(size_status);
4779
4780                 size_t max_suffix_size = sizeof(char_t);
4781
4782                 // allocate buffer for the whole file
4783                 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4784                 if (!contents) return make_parse_result(status_out_of_memory);
4785
4786                 // read file in memory
4787                 size_t read_size = fread(contents, 1, size, file);
4788
4789                 if (read_size != size)
4790                 {
4791                         xml_memory::deallocate(contents);
4792                         return make_parse_result(status_io_error);
4793                 }
4794
4795                 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4796
4797                 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4798         }
4799
4800         PUGI__FN void close_file(FILE* file)
4801         {
4802                 fclose(file);
4803         }
4804
4805 #ifndef PUGIXML_NO_STL
4806         template <typename T> struct xml_stream_chunk
4807         {
4808                 static xml_stream_chunk* create()
4809                 {
4810                         void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4811                         if (!memory) return 0;
4812
4813                         return new (memory) xml_stream_chunk();
4814                 }
4815
4816                 static void destroy(xml_stream_chunk* chunk)
4817                 {
4818                         // free chunk chain
4819                         while (chunk)
4820                         {
4821                                 xml_stream_chunk* next_ = chunk->next;
4822
4823                                 xml_memory::deallocate(chunk);
4824
4825                                 chunk = next_;
4826                         }
4827                 }
4828
4829                 xml_stream_chunk(): next(0), size(0)
4830                 {
4831                 }
4832
4833                 xml_stream_chunk* next;
4834                 size_t size;
4835
4836                 T data[xml_memory_page_size / sizeof(T)];
4837         };
4838
4839         template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4840         {
4841                 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4842
4843                 // read file to a chunk list
4844                 size_t total = 0;
4845                 xml_stream_chunk<T>* last = 0;
4846
4847                 while (!stream.eof())
4848                 {
4849                         // allocate new chunk
4850                         xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4851                         if (!chunk) return status_out_of_memory;
4852
4853                         // append chunk to list
4854                         if (last) last = last->next = chunk;
4855                         else chunks.data = last = chunk;
4856
4857                         // read data to chunk
4858                         stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4859                         chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4860
4861                         // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4862                         if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4863
4864                         // guard against huge files (chunk size is small enough to make this overflow check work)
4865                         if (total + chunk->size < total) return status_out_of_memory;
4866                         total += chunk->size;
4867                 }
4868
4869                 size_t max_suffix_size = sizeof(char_t);
4870
4871                 // copy chunk list to a contiguous buffer
4872                 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4873                 if (!buffer) return status_out_of_memory;
4874
4875                 char* write = buffer;
4876
4877                 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4878                 {
4879                         assert(write + chunk->size <= buffer + total);
4880                         memcpy(write, chunk->data, chunk->size);
4881                         write += chunk->size;
4882                 }
4883
4884                 assert(write == buffer + total);
4885
4886                 // return buffer
4887                 *out_buffer = buffer;
4888                 *out_size = total;
4889
4890                 return status_ok;
4891         }
4892
4893         template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4894         {
4895                 // get length of remaining data in stream
4896                 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4897                 stream.seekg(0, std::ios::end);
4898                 std::streamoff length = stream.tellg() - pos;
4899                 stream.seekg(pos);
4900
4901                 if (stream.fail() || pos < 0) return status_io_error;
4902
4903                 // guard against huge files
4904                 size_t read_length = static_cast<size_t>(length);
4905
4906                 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4907
4908                 size_t max_suffix_size = sizeof(char_t);
4909
4910                 // read stream data into memory (guard against stream exceptions with buffer holder)
4911                 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4912                 if (!buffer.data) return status_out_of_memory;
4913
4914                 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4915
4916                 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4917                 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4918
4919                 // return buffer
4920                 size_t actual_length = static_cast<size_t>(stream.gcount());
4921                 assert(actual_length <= read_length);
4922
4923                 *out_buffer = buffer.release();
4924                 *out_size = actual_length * sizeof(T);
4925
4926                 return status_ok;
4927         }
4928
4929         template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4930         {
4931                 void* buffer = 0;
4932                 size_t size = 0;
4933                 xml_parse_status status = status_ok;
4934
4935                 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4936                 if (stream.fail()) return make_parse_result(status_io_error);
4937
4938                 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4939                 if (stream.tellg() < 0)
4940                 {
4941                         stream.clear(); // clear error flags that could be set by a failing tellg
4942                         status = load_stream_data_noseek(stream, &buffer, &size);
4943                 }
4944                 else
4945                         status = load_stream_data_seek(stream, &buffer, &size);
4946
4947                 if (status != status_ok) return make_parse_result(status);
4948
4949                 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4950
4951                 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4952         }
4953 #endif
4954
4955 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4956         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4957         {
4958                 return _wfopen(path, mode);
4959         }
4960 #else
4961         PUGI__FN char* convert_path_heap(const wchar_t* str)
4962         {
4963                 assert(str);
4964
4965                 // first pass: get length in utf8 characters
4966                 size_t length = strlength_wide(str);
4967                 size_t size = as_utf8_begin(str, length);
4968
4969                 // allocate resulting string
4970                 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4971                 if (!result) return 0;
4972
4973                 // second pass: convert to utf8
4974                 as_utf8_end(result, size, str, length);
4975
4976                 // zero-terminate
4977                 result[size] = 0;
4978
4979                 return result;
4980         }
4981
4982         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4983         {
4984                 // there is no standard function to open wide paths, so our best bet is to try utf8 path
4985                 char* path_utf8 = convert_path_heap(path);
4986                 if (!path_utf8) return 0;
4987
4988                 // convert mode to ASCII (we mirror _wfopen interface)
4989                 char mode_ascii[4] = {0};
4990                 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4991
4992                 // try to open the utf8 path
4993                 FILE* result = fopen(path_utf8, mode_ascii);
4994
4995                 // free dummy buffer
4996                 xml_memory::deallocate(path_utf8);
4997
4998                 return result;
4999         }
5000 #endif
5001
5002         PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5003         {
5004                 if (!file) return false;
5005
5006                 xml_writer_file writer(file);
5007                 doc.save(writer, indent, flags, encoding);
5008
5009                 return ferror(file) == 0;
5010         }
5011
5012         struct name_null_sentry
5013         {
5014                 xml_node_struct* node;
5015                 char_t* name;
5016
5017                 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5018                 {
5019                         node->name = 0;
5020                 }
5021
5022                 ~name_null_sentry()
5023                 {
5024                         node->name = name;
5025                 }
5026         };
5027 PUGI__NS_END
5028
5029 namespace pugi
5030 {
5031         PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5032         {
5033         }
5034
5035         PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5036         {
5037                 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5038                 (void)!result; // unfortunately we can't do proper error handling here
5039         }
5040
5041 #ifndef PUGIXML_NO_STL
5042         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5043         {
5044         }
5045
5046         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5047         {
5048         }
5049
5050         PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5051         {
5052                 if (narrow_stream)
5053                 {
5054                         assert(!wide_stream);
5055                         narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5056                 }
5057                 else
5058                 {
5059                         assert(wide_stream);
5060                         assert(size % sizeof(wchar_t) == 0);
5061
5062                         wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5063                 }
5064         }
5065 #endif
5066
5067         PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5068         {
5069         }
5070
5071         PUGI__FN xml_tree_walker::~xml_tree_walker()
5072         {
5073         }
5074
5075         PUGI__FN int xml_tree_walker::depth() const
5076         {
5077                 return _depth;
5078         }
5079
5080         PUGI__FN bool xml_tree_walker::begin(xml_node&)
5081         {
5082                 return true;
5083         }
5084
5085         PUGI__FN bool xml_tree_walker::end(xml_node&)
5086         {
5087                 return true;
5088         }
5089
5090         PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5091         {
5092         }
5093
5094         PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5095         {
5096         }
5097
5098         PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5099         {
5100         }
5101
5102         PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5103         {
5104                 return _attr ? unspecified_bool_xml_attribute : 0;
5105         }
5106
5107         PUGI__FN bool xml_attribute::operator!() const
5108         {
5109                 return !_attr;
5110         }
5111
5112         PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5113         {
5114                 return (_attr == r._attr);
5115         }
5116
5117         PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5118         {
5119                 return (_attr != r._attr);
5120         }
5121
5122         PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5123         {
5124                 return (_attr < r._attr);
5125         }
5126
5127         PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5128         {
5129                 return (_attr > r._attr);
5130         }
5131
5132         PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5133         {
5134                 return (_attr <= r._attr);
5135         }
5136
5137         PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5138         {
5139                 return (_attr >= r._attr);
5140         }
5141
5142         PUGI__FN xml_attribute xml_attribute::next_attribute() const
5143         {
5144                 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5145         }
5146
5147         PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5148         {
5149                 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5150         }
5151
5152         PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5153         {
5154                 return (_attr && _attr->value) ? _attr->value + 0 : def;
5155         }
5156
5157         PUGI__FN int xml_attribute::as_int(int def) const
5158         {
5159                 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5160         }
5161
5162         PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5163         {
5164                 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5165         }
5166
5167         PUGI__FN double xml_attribute::as_double(double def) const
5168         {
5169                 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5170         }
5171
5172         PUGI__FN float xml_attribute::as_float(float def) const
5173         {
5174                 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5175         }
5176
5177         PUGI__FN bool xml_attribute::as_bool(bool def) const
5178         {
5179                 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5180         }
5181
5182 #ifdef PUGIXML_HAS_LONG_LONG
5183         PUGI__FN long long xml_attribute::as_llong(long long def) const
5184         {
5185                 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5186         }
5187
5188         PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5189         {
5190                 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5191         }
5192 #endif
5193
5194         PUGI__FN bool xml_attribute::empty() const
5195         {
5196                 return !_attr;
5197         }
5198
5199         PUGI__FN const char_t* xml_attribute::name() const
5200         {
5201                 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5202         }
5203
5204         PUGI__FN const char_t* xml_attribute::value() const
5205         {
5206                 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5207         }
5208
5209         PUGI__FN size_t xml_attribute::hash_value() const
5210         {
5211                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5212         }
5213
5214         PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5215         {
5216                 return _attr;
5217         }
5218
5219         PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5220         {
5221                 set_value(rhs);
5222                 return *this;
5223         }
5224
5225         PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5226         {
5227                 set_value(rhs);
5228                 return *this;
5229         }
5230
5231         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5232         {
5233                 set_value(rhs);
5234                 return *this;
5235         }
5236
5237         PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5238         {
5239                 set_value(rhs);
5240                 return *this;
5241         }
5242
5243         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5244         {
5245                 set_value(rhs);
5246                 return *this;
5247         }
5248
5249         PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5250         {
5251                 set_value(rhs);
5252                 return *this;
5253         }
5254
5255         PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5256         {
5257                 set_value(rhs);
5258                 return *this;
5259         }
5260
5261         PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5262         {
5263                 set_value(rhs);
5264                 return *this;
5265         }
5266
5267 #ifdef PUGIXML_HAS_LONG_LONG
5268         PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5269         {
5270                 set_value(rhs);
5271                 return *this;
5272         }
5273
5274         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5275         {
5276                 set_value(rhs);
5277                 return *this;
5278         }
5279 #endif
5280
5281         PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5282         {
5283                 if (!_attr) return false;
5284
5285                 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5286         }
5287
5288         PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5289         {
5290                 if (!_attr) return false;
5291
5292                 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5293         }
5294
5295         PUGI__FN bool xml_attribute::set_value(int rhs)
5296         {
5297                 if (!_attr) return false;
5298
5299                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5300         }
5301
5302         PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5303         {
5304                 if (!_attr) return false;
5305
5306                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5307         }
5308
5309         PUGI__FN bool xml_attribute::set_value(long rhs)
5310         {
5311                 if (!_attr) return false;
5312
5313                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5314         }
5315
5316         PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5317         {
5318                 if (!_attr) return false;
5319
5320                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5321         }
5322
5323         PUGI__FN bool xml_attribute::set_value(double rhs)
5324         {
5325                 if (!_attr) return false;
5326
5327                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5328         }
5329
5330         PUGI__FN bool xml_attribute::set_value(float rhs)
5331         {
5332                 if (!_attr) return false;
5333
5334                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5335         }
5336
5337         PUGI__FN bool xml_attribute::set_value(bool rhs)
5338         {
5339                 if (!_attr) return false;
5340
5341                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5342         }
5343
5344 #ifdef PUGIXML_HAS_LONG_LONG
5345         PUGI__FN bool xml_attribute::set_value(long long rhs)
5346         {
5347                 if (!_attr) return false;
5348
5349                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5350         }
5351
5352         PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5353         {
5354                 if (!_attr) return false;
5355
5356                 return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5357         }
5358 #endif
5359
5360 #ifdef __BORLANDC__
5361         PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5362         {
5363                 return (bool)lhs && rhs;
5364         }
5365
5366         PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5367         {
5368                 return (bool)lhs || rhs;
5369         }
5370 #endif
5371
5372         PUGI__FN xml_node::xml_node(): _root(0)
5373         {
5374         }
5375
5376         PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5377         {
5378         }
5379
5380         PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5381         {
5382         }
5383
5384         PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5385         {
5386                 return _root ? unspecified_bool_xml_node : 0;
5387         }
5388
5389         PUGI__FN bool xml_node::operator!() const
5390         {
5391                 return !_root;
5392         }
5393
5394         PUGI__FN xml_node::iterator xml_node::begin() const
5395         {
5396                 return iterator(_root ? _root->first_child + 0 : 0, _root);
5397         }
5398
5399         PUGI__FN xml_node::iterator xml_node::end() const
5400         {
5401                 return iterator(0, _root);
5402         }
5403
5404         PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5405         {
5406                 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5407         }
5408
5409         PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5410         {
5411                 return attribute_iterator(0, _root);
5412         }
5413
5414         PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5415         {
5416                 return xml_object_range<xml_node_iterator>(begin(), end());
5417         }
5418
5419         PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5420         {
5421                 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5422         }
5423
5424         PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5425         {
5426                 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5427         }
5428
5429         PUGI__FN bool xml_node::operator==(const xml_node& r) const
5430         {
5431                 return (_root == r._root);
5432         }
5433
5434         PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5435         {
5436                 return (_root != r._root);
5437         }
5438
5439         PUGI__FN bool xml_node::operator<(const xml_node& r) const
5440         {
5441                 return (_root < r._root);
5442         }
5443
5444         PUGI__FN bool xml_node::operator>(const xml_node& r) const
5445         {
5446                 return (_root > r._root);
5447         }
5448
5449         PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5450         {
5451                 return (_root <= r._root);
5452         }
5453
5454         PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5455         {
5456                 return (_root >= r._root);
5457         }
5458
5459         PUGI__FN bool xml_node::empty() const
5460         {
5461                 return !_root;
5462         }
5463
5464         PUGI__FN const char_t* xml_node::name() const
5465         {
5466                 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5467         }
5468
5469         PUGI__FN xml_node_type xml_node::type() const
5470         {
5471                 return _root ? PUGI__NODETYPE(_root) : node_null;
5472         }
5473
5474         PUGI__FN const char_t* xml_node::value() const
5475         {
5476                 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5477         }
5478
5479         PUGI__FN xml_node xml_node::child(const char_t* name_) const
5480         {
5481                 if (!_root) return xml_node();
5482
5483                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5484                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5485
5486                 return xml_node();
5487         }
5488
5489         PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5490         {
5491                 if (!_root) return xml_attribute();
5492
5493                 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5494                         if (i->name && impl::strequal(name_, i->name))
5495                                 return xml_attribute(i);
5496
5497                 return xml_attribute();
5498         }
5499
5500         PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5501         {
5502                 if (!_root) return xml_node();
5503
5504                 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5505                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5506
5507                 return xml_node();
5508         }
5509
5510         PUGI__FN xml_node xml_node::next_sibling() const
5511         {
5512                 return _root ? xml_node(_root->next_sibling) : xml_node();
5513         }
5514
5515         PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5516         {
5517                 if (!_root) return xml_node();
5518
5519                 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5520                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5521
5522                 return xml_node();
5523         }
5524
5525         PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5526         {
5527                 xml_attribute_struct* hint = hint_._attr;
5528
5529                 // if hint is not an attribute of node, behavior is not defined
5530                 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5531
5532                 if (!_root) return xml_attribute();
5533
5534                 // optimistically search from hint up until the end
5535                 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5536                         if (i->name && impl::strequal(name_, i->name))
5537                         {
5538                                 // update hint to maximize efficiency of searching for consecutive attributes
5539                                 hint_._attr = i->next_attribute;
5540
5541                                 return xml_attribute(i);
5542                         }
5543
5544                 // wrap around and search from the first attribute until the hint
5545                 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5546                 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5547                         if (j->name && impl::strequal(name_, j->name))
5548                         {
5549                                 // update hint to maximize efficiency of searching for consecutive attributes
5550                                 hint_._attr = j->next_attribute;
5551
5552                                 return xml_attribute(j);
5553                         }
5554
5555                 return xml_attribute();
5556         }
5557
5558         PUGI__FN xml_node xml_node::previous_sibling() const
5559         {
5560                 if (!_root) return xml_node();
5561
5562                 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5563                 else return xml_node();
5564         }
5565
5566         PUGI__FN xml_node xml_node::parent() const
5567         {
5568                 return _root ? xml_node(_root->parent) : xml_node();
5569         }
5570
5571         PUGI__FN xml_node xml_node::root() const
5572         {
5573                 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5574         }
5575
5576         PUGI__FN xml_text xml_node::text() const
5577         {
5578                 return xml_text(_root);
5579         }
5580
5581         PUGI__FN const char_t* xml_node::child_value() const
5582         {
5583                 if (!_root) return PUGIXML_TEXT("");
5584
5585                 // element nodes can have value if parse_embed_pcdata was used
5586                 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5587                         return _root->value;
5588
5589                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5590                         if (impl::is_text_node(i) && i->value)
5591                                 return i->value;
5592
5593                 return PUGIXML_TEXT("");
5594         }
5595
5596         PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5597         {
5598                 return child(name_).child_value();
5599         }
5600
5601         PUGI__FN xml_attribute xml_node::first_attribute() const
5602         {
5603                 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5604         }
5605
5606         PUGI__FN xml_attribute xml_node::last_attribute() const
5607         {
5608                 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5609         }
5610
5611         PUGI__FN xml_node xml_node::first_child() const
5612         {
5613                 return _root ? xml_node(_root->first_child) : xml_node();
5614         }
5615
5616         PUGI__FN xml_node xml_node::last_child() const
5617         {
5618                 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5619         }
5620
5621         PUGI__FN bool xml_node::set_name(const char_t* rhs)
5622         {
5623                 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5624
5625                 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5626                         return false;
5627
5628                 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5629         }
5630
5631         PUGI__FN bool xml_node::set_value(const char_t* rhs)
5632         {
5633                 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5634
5635                 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5636                         return false;
5637
5638                 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5639         }
5640
5641         PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5642         {
5643                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5644
5645                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5646                 if (!alloc.reserve()) return xml_attribute();
5647
5648                 xml_attribute a(impl::allocate_attribute(alloc));
5649                 if (!a) return xml_attribute();
5650
5651                 impl::append_attribute(a._attr, _root);
5652
5653                 a.set_name(name_);
5654
5655                 return a;
5656         }
5657
5658         PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5659         {
5660                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5661
5662                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5663                 if (!alloc.reserve()) return xml_attribute();
5664
5665                 xml_attribute a(impl::allocate_attribute(alloc));
5666                 if (!a) return xml_attribute();
5667
5668                 impl::prepend_attribute(a._attr, _root);
5669
5670                 a.set_name(name_);
5671
5672                 return a;
5673         }
5674
5675         PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5676         {
5677                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5678                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5679
5680                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5681                 if (!alloc.reserve()) return xml_attribute();
5682
5683                 xml_attribute a(impl::allocate_attribute(alloc));
5684                 if (!a) return xml_attribute();
5685
5686                 impl::insert_attribute_after(a._attr, attr._attr, _root);
5687
5688                 a.set_name(name_);
5689
5690                 return a;
5691         }
5692
5693         PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5694         {
5695                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5696                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5697
5698                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5699                 if (!alloc.reserve()) return xml_attribute();
5700
5701                 xml_attribute a(impl::allocate_attribute(alloc));
5702                 if (!a) return xml_attribute();
5703
5704                 impl::insert_attribute_before(a._attr, attr._attr, _root);
5705
5706                 a.set_name(name_);
5707
5708                 return a;
5709         }
5710
5711         PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5712         {
5713                 if (!proto) return xml_attribute();
5714                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5715
5716                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5717                 if (!alloc.reserve()) return xml_attribute();
5718
5719                 xml_attribute a(impl::allocate_attribute(alloc));
5720                 if (!a) return xml_attribute();
5721
5722                 impl::append_attribute(a._attr, _root);
5723                 impl::node_copy_attribute(a._attr, proto._attr);
5724
5725                 return a;
5726         }
5727
5728         PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5729         {
5730                 if (!proto) return xml_attribute();
5731                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5732
5733                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5734                 if (!alloc.reserve()) return xml_attribute();
5735
5736                 xml_attribute a(impl::allocate_attribute(alloc));
5737                 if (!a) return xml_attribute();
5738
5739                 impl::prepend_attribute(a._attr, _root);
5740                 impl::node_copy_attribute(a._attr, proto._attr);
5741
5742                 return a;
5743         }
5744
5745         PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5746         {
5747                 if (!proto) return xml_attribute();
5748                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5749                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5750
5751                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5752                 if (!alloc.reserve()) return xml_attribute();
5753
5754                 xml_attribute a(impl::allocate_attribute(alloc));
5755                 if (!a) return xml_attribute();
5756
5757                 impl::insert_attribute_after(a._attr, attr._attr, _root);
5758                 impl::node_copy_attribute(a._attr, proto._attr);
5759
5760                 return a;
5761         }
5762
5763         PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5764         {
5765                 if (!proto) return xml_attribute();
5766                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5767                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5768
5769                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5770                 if (!alloc.reserve()) return xml_attribute();
5771
5772                 xml_attribute a(impl::allocate_attribute(alloc));
5773                 if (!a) return xml_attribute();
5774
5775                 impl::insert_attribute_before(a._attr, attr._attr, _root);
5776                 impl::node_copy_attribute(a._attr, proto._attr);
5777
5778                 return a;
5779         }
5780
5781         PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5782         {
5783                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5784
5785                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5786                 if (!alloc.reserve()) return xml_node();
5787
5788                 xml_node n(impl::allocate_node(alloc, type_));
5789                 if (!n) return xml_node();
5790
5791                 impl::append_node(n._root, _root);
5792
5793                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5794
5795                 return n;
5796         }
5797
5798         PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5799         {
5800                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5801
5802                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5803                 if (!alloc.reserve()) return xml_node();
5804
5805                 xml_node n(impl::allocate_node(alloc, type_));
5806                 if (!n) return xml_node();
5807
5808                 impl::prepend_node(n._root, _root);
5809
5810                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5811
5812                 return n;
5813         }
5814
5815         PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5816         {
5817                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5818                 if (!node._root || node._root->parent != _root) return xml_node();
5819
5820                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5821                 if (!alloc.reserve()) return xml_node();
5822
5823                 xml_node n(impl::allocate_node(alloc, type_));
5824                 if (!n) return xml_node();
5825
5826                 impl::insert_node_before(n._root, node._root);
5827
5828                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5829
5830                 return n;
5831         }
5832
5833         PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5834         {
5835                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5836                 if (!node._root || node._root->parent != _root) return xml_node();
5837
5838                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5839                 if (!alloc.reserve()) return xml_node();
5840
5841                 xml_node n(impl::allocate_node(alloc, type_));
5842                 if (!n) return xml_node();
5843
5844                 impl::insert_node_after(n._root, node._root);
5845
5846                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5847
5848                 return n;
5849         }
5850
5851         PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5852         {
5853                 xml_node result = append_child(node_element);
5854
5855                 result.set_name(name_);
5856
5857                 return result;
5858         }
5859
5860         PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5861         {
5862                 xml_node result = prepend_child(node_element);
5863
5864                 result.set_name(name_);
5865
5866                 return result;
5867         }
5868
5869         PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5870         {
5871                 xml_node result = insert_child_after(node_element, node);
5872
5873                 result.set_name(name_);
5874
5875                 return result;
5876         }
5877
5878         PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5879         {
5880                 xml_node result = insert_child_before(node_element, node);
5881
5882                 result.set_name(name_);
5883
5884                 return result;
5885         }
5886
5887         PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5888         {
5889                 xml_node_type type_ = proto.type();
5890                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5891
5892                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5893                 if (!alloc.reserve()) return xml_node();
5894
5895                 xml_node n(impl::allocate_node(alloc, type_));
5896                 if (!n) return xml_node();
5897
5898                 impl::append_node(n._root, _root);
5899                 impl::node_copy_tree(n._root, proto._root);
5900
5901                 return n;
5902         }
5903
5904         PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5905         {
5906                 xml_node_type type_ = proto.type();
5907                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5908
5909                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5910                 if (!alloc.reserve()) return xml_node();
5911
5912                 xml_node n(impl::allocate_node(alloc, type_));
5913                 if (!n) return xml_node();
5914
5915                 impl::prepend_node(n._root, _root);
5916                 impl::node_copy_tree(n._root, proto._root);
5917
5918                 return n;
5919         }
5920
5921         PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5922         {
5923                 xml_node_type type_ = proto.type();
5924                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5925                 if (!node._root || node._root->parent != _root) return xml_node();
5926
5927                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5928                 if (!alloc.reserve()) return xml_node();
5929
5930                 xml_node n(impl::allocate_node(alloc, type_));
5931                 if (!n) return xml_node();
5932
5933                 impl::insert_node_after(n._root, node._root);
5934                 impl::node_copy_tree(n._root, proto._root);
5935
5936                 return n;
5937         }
5938
5939         PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5940         {
5941                 xml_node_type type_ = proto.type();
5942                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5943                 if (!node._root || node._root->parent != _root) return xml_node();
5944
5945                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5946                 if (!alloc.reserve()) return xml_node();
5947
5948                 xml_node n(impl::allocate_node(alloc, type_));
5949                 if (!n) return xml_node();
5950
5951                 impl::insert_node_before(n._root, node._root);
5952                 impl::node_copy_tree(n._root, proto._root);
5953
5954                 return n;
5955         }
5956
5957         PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5958         {
5959                 if (!impl::allow_move(*this, moved)) return xml_node();
5960
5961                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5962                 if (!alloc.reserve()) return xml_node();
5963
5964                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5965                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5966
5967                 impl::remove_node(moved._root);
5968                 impl::append_node(moved._root, _root);
5969
5970                 return moved;
5971         }
5972
5973         PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5974         {
5975                 if (!impl::allow_move(*this, moved)) return xml_node();
5976
5977                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5978                 if (!alloc.reserve()) return xml_node();
5979
5980                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5981                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5982
5983                 impl::remove_node(moved._root);
5984                 impl::prepend_node(moved._root, _root);
5985
5986                 return moved;
5987         }
5988
5989         PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5990         {
5991                 if (!impl::allow_move(*this, moved)) return xml_node();
5992                 if (!node._root || node._root->parent != _root) return xml_node();
5993                 if (moved._root == node._root) return xml_node();
5994
5995                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5996                 if (!alloc.reserve()) return xml_node();
5997
5998                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5999                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6000
6001                 impl::remove_node(moved._root);
6002                 impl::insert_node_after(moved._root, node._root);
6003
6004                 return moved;
6005         }
6006
6007         PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6008         {
6009                 if (!impl::allow_move(*this, moved)) return xml_node();
6010                 if (!node._root || node._root->parent != _root) return xml_node();
6011                 if (moved._root == node._root) return xml_node();
6012
6013                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6014                 if (!alloc.reserve()) return xml_node();
6015
6016                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6017                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6018
6019                 impl::remove_node(moved._root);
6020                 impl::insert_node_before(moved._root, node._root);
6021
6022                 return moved;
6023         }
6024
6025         PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6026         {
6027                 return remove_attribute(attribute(name_));
6028         }
6029
6030         PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6031         {
6032                 if (!_root || !a._attr) return false;
6033                 if (!impl::is_attribute_of(a._attr, _root)) return false;
6034
6035                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6036                 if (!alloc.reserve()) return false;
6037
6038                 impl::remove_attribute(a._attr, _root);
6039                 impl::destroy_attribute(a._attr, alloc);
6040
6041                 return true;
6042         }
6043
6044         PUGI__FN bool xml_node::remove_child(const char_t* name_)
6045         {
6046                 return remove_child(child(name_));
6047         }
6048
6049         PUGI__FN bool xml_node::remove_child(const xml_node& n)
6050         {
6051                 if (!_root || !n._root || n._root->parent != _root) return false;
6052
6053                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6054                 if (!alloc.reserve()) return false;
6055
6056                 impl::remove_node(n._root);
6057                 impl::destroy_node(n._root, alloc);
6058
6059                 return true;
6060         }
6061
6062         PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6063         {
6064                 // append_buffer is only valid for elements/documents
6065                 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6066
6067                 // get document node
6068                 impl::xml_document_struct* doc = &impl::get_document(_root);
6069
6070                 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6071                 doc->header |= impl::xml_memory_page_contents_shared_mask;
6072
6073                 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6074                 impl::xml_memory_page* page = 0;
6075                 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
6076                 (void)page;
6077
6078                 if (!extra) return impl::make_parse_result(status_out_of_memory);
6079
6080                 // add extra buffer to the list
6081                 extra->buffer = 0;
6082                 extra->next = doc->extra_buffers;
6083                 doc->extra_buffers = extra;
6084
6085                 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6086                 impl::name_null_sentry sentry(_root);
6087
6088                 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6089         }
6090
6091         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6092         {
6093                 if (!_root) return xml_node();
6094
6095                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6096                         if (i->name && impl::strequal(name_, i->name))
6097                         {
6098                                 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6099                                         if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6100                                                 return xml_node(i);
6101                         }
6102
6103                 return xml_node();
6104         }
6105
6106         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6107         {
6108                 if (!_root) return xml_node();
6109
6110                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6111                         for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6112                                 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6113                                         return xml_node(i);
6114
6115                 return xml_node();
6116         }
6117
6118 #ifndef PUGIXML_NO_STL
6119         PUGI__FN string_t xml_node::path(char_t delimiter) const
6120         {
6121                 if (!_root) return string_t();
6122
6123                 size_t offset = 0;
6124
6125                 for (xml_node_struct* i = _root; i; i = i->parent)
6126                 {
6127                         offset += (i != _root);
6128                         offset += i->name ? impl::strlength(i->name) : 0;
6129                 }
6130
6131                 string_t result;
6132                 result.resize(offset);
6133
6134                 for (xml_node_struct* j = _root; j; j = j->parent)
6135                 {
6136                         if (j != _root)
6137                                 result[--offset] = delimiter;
6138
6139                         if (j->name && *j->name)
6140                         {
6141                                 size_t length = impl::strlength(j->name);
6142
6143                                 offset -= length;
6144                                 memcpy(&result[offset], j->name, length * sizeof(char_t));
6145                         }
6146                 }
6147
6148                 assert(offset == 0);
6149
6150                 return result;
6151         }
6152 #endif
6153
6154         PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6155         {
6156                 xml_node found = *this; // Current search context.
6157
6158                 if (!_root || !path_ || !path_[0]) return found;
6159
6160                 if (path_[0] == delimiter)
6161                 {
6162                         // Absolute path; e.g. '/foo/bar'
6163                         found = found.root();
6164                         ++path_;
6165                 }
6166
6167                 const char_t* path_segment = path_;
6168
6169                 while (*path_segment == delimiter) ++path_segment;
6170
6171                 const char_t* path_segment_end = path_segment;
6172
6173                 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6174
6175                 if (path_segment == path_segment_end) return found;
6176
6177                 const char_t* next_segment = path_segment_end;
6178
6179                 while (*next_segment == delimiter) ++next_segment;
6180
6181                 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6182                         return found.first_element_by_path(next_segment, delimiter);
6183                 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6184                         return found.parent().first_element_by_path(next_segment, delimiter);
6185                 else
6186                 {
6187                         if (found._root) {
6188                                 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6189                                 {
6190                                         if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6191                                         {
6192                                                 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6193
6194                                                 if (subsearch) return subsearch;
6195                                         }
6196                                 }
6197                         }
6198
6199                         return xml_node();
6200                 }
6201         }
6202
6203         PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6204         {
6205                 walker._depth = -1;
6206
6207                 xml_node arg_begin = *this;
6208                 if (!walker.begin(arg_begin)) return false;
6209
6210                 xml_node cur = first_child();
6211
6212                 if (cur)
6213                 {
6214                         ++walker._depth;
6215
6216                         do
6217                         {
6218                                 xml_node arg_for_each = cur;
6219                                 if (!walker.for_each(arg_for_each))
6220                                         return false;
6221
6222                                 if (cur.first_child())
6223                                 {
6224                                         ++walker._depth;
6225                                         cur = cur.first_child();
6226                                 }
6227                                 else if (cur.next_sibling())
6228                                         cur = cur.next_sibling();
6229                                 else
6230                                 {
6231                                         // Borland C++ workaround
6232                                         while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
6233                                         {
6234                                                 --walker._depth;
6235                                                 cur = cur.parent();
6236                                         }
6237
6238                                         if (cur != *this)
6239                                                 cur = cur.next_sibling();
6240                                 }
6241                         }
6242                         while (cur && cur != *this);
6243                 }
6244
6245                 assert(walker._depth == -1);
6246
6247                 xml_node arg_end = *this;
6248                 return walker.end(arg_end);
6249         }
6250
6251         PUGI__FN size_t xml_node::hash_value() const
6252         {
6253                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6254         }
6255
6256         PUGI__FN xml_node_struct* xml_node::internal_object() const
6257         {
6258                 return _root;
6259         }
6260
6261         PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6262         {
6263                 if (!_root) return;
6264
6265                 impl::xml_buffered_writer buffered_writer(writer, encoding);
6266
6267                 impl::node_output(buffered_writer, _root, indent, flags, depth);
6268
6269                 buffered_writer.flush();
6270         }
6271
6272 #ifndef PUGIXML_NO_STL
6273         PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6274         {
6275                 xml_writer_stream writer(stream);
6276
6277                 print(writer, indent, flags, encoding, depth);
6278         }
6279
6280         PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6281         {
6282                 xml_writer_stream writer(stream);
6283
6284                 print(writer, indent, flags, encoding_wchar, depth);
6285         }
6286 #endif
6287
6288         PUGI__FN ptrdiff_t xml_node::offset_debug() const
6289         {
6290                 if (!_root) return -1;
6291
6292                 impl::xml_document_struct& doc = impl::get_document(_root);
6293
6294                 // we can determine the offset reliably only if there is exactly once parse buffer
6295                 if (!doc.buffer || doc.extra_buffers) return -1;
6296
6297                 switch (type())
6298                 {
6299                 case node_document:
6300                         return 0;
6301
6302                 case node_element:
6303                 case node_declaration:
6304                 case node_pi:
6305                         return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6306
6307                 case node_pcdata:
6308                 case node_cdata:
6309                 case node_comment:
6310                 case node_doctype:
6311                         return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6312
6313                 default:
6314                         return -1;
6315                 }
6316         }
6317
6318 #ifdef __BORLANDC__
6319         PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6320         {
6321                 return (bool)lhs && rhs;
6322         }
6323
6324         PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6325         {
6326                 return (bool)lhs || rhs;
6327         }
6328 #endif
6329
6330         PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6331         {
6332         }
6333
6334         PUGI__FN xml_node_struct* xml_text::_data() const
6335         {
6336                 if (!_root || impl::is_text_node(_root)) return _root;
6337
6338                 // element nodes can have value if parse_embed_pcdata was used
6339                 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6340                         return _root;
6341
6342                 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6343                         if (impl::is_text_node(node))
6344                                 return node;
6345
6346                 return 0;
6347         }
6348
6349         PUGI__FN xml_node_struct* xml_text::_data_new()
6350         {
6351                 xml_node_struct* d = _data();
6352                 if (d) return d;
6353
6354                 return xml_node(_root).append_child(node_pcdata).internal_object();
6355         }
6356
6357         PUGI__FN xml_text::xml_text(): _root(0)
6358         {
6359         }
6360
6361         PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6362         {
6363         }
6364
6365         PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6366         {
6367                 return _data() ? unspecified_bool_xml_text : 0;
6368         }
6369
6370         PUGI__FN bool xml_text::operator!() const
6371         {
6372                 return !_data();
6373         }
6374
6375         PUGI__FN bool xml_text::empty() const
6376         {
6377                 return _data() == 0;
6378         }
6379
6380         PUGI__FN const char_t* xml_text::get() const
6381         {
6382                 xml_node_struct* d = _data();
6383
6384                 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6385         }
6386
6387         PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6388         {
6389                 xml_node_struct* d = _data();
6390
6391                 return (d && d->value) ? d->value + 0 : def;
6392         }
6393
6394         PUGI__FN int xml_text::as_int(int def) const
6395         {
6396                 xml_node_struct* d = _data();
6397
6398                 return (d && d->value) ? impl::get_value_int(d->value) : def;
6399         }
6400
6401         PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6402         {
6403                 xml_node_struct* d = _data();
6404
6405                 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6406         }
6407
6408         PUGI__FN double xml_text::as_double(double def) const
6409         {
6410                 xml_node_struct* d = _data();
6411
6412                 return (d && d->value) ? impl::get_value_double(d->value) : def;
6413         }
6414
6415         PUGI__FN float xml_text::as_float(float def) const
6416         {
6417                 xml_node_struct* d = _data();
6418
6419                 return (d && d->value) ? impl::get_value_float(d->value) : def;
6420         }
6421
6422         PUGI__FN bool xml_text::as_bool(bool def) const
6423         {
6424                 xml_node_struct* d = _data();
6425
6426                 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6427         }
6428
6429 #ifdef PUGIXML_HAS_LONG_LONG
6430         PUGI__FN long long xml_text::as_llong(long long def) const
6431         {
6432                 xml_node_struct* d = _data();
6433
6434                 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6435         }
6436
6437         PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6438         {
6439                 xml_node_struct* d = _data();
6440
6441                 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6442         }
6443 #endif
6444
6445         PUGI__FN bool xml_text::set(const char_t* rhs)
6446         {
6447                 xml_node_struct* dn = _data_new();
6448
6449                 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6450         }
6451
6452         PUGI__FN bool xml_text::set(int rhs)
6453         {
6454                 xml_node_struct* dn = _data_new();
6455
6456                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6457         }
6458
6459         PUGI__FN bool xml_text::set(unsigned int rhs)
6460         {
6461                 xml_node_struct* dn = _data_new();
6462
6463                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6464         }
6465
6466         PUGI__FN bool xml_text::set(long rhs)
6467         {
6468                 xml_node_struct* dn = _data_new();
6469
6470                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6471         }
6472
6473         PUGI__FN bool xml_text::set(unsigned long rhs)
6474         {
6475                 xml_node_struct* dn = _data_new();
6476
6477                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6478         }
6479
6480         PUGI__FN bool xml_text::set(float rhs)
6481         {
6482                 xml_node_struct* dn = _data_new();
6483
6484                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6485         }
6486
6487         PUGI__FN bool xml_text::set(double rhs)
6488         {
6489                 xml_node_struct* dn = _data_new();
6490
6491                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6492         }
6493
6494         PUGI__FN bool xml_text::set(bool rhs)
6495         {
6496                 xml_node_struct* dn = _data_new();
6497
6498                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6499         }
6500
6501 #ifdef PUGIXML_HAS_LONG_LONG
6502         PUGI__FN bool xml_text::set(long long rhs)
6503         {
6504                 xml_node_struct* dn = _data_new();
6505
6506                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6507         }
6508
6509         PUGI__FN bool xml_text::set(unsigned long long rhs)
6510         {
6511                 xml_node_struct* dn = _data_new();
6512
6513                 return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6514         }
6515 #endif
6516
6517         PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6518         {
6519                 set(rhs);
6520                 return *this;
6521         }
6522
6523         PUGI__FN xml_text& xml_text::operator=(int rhs)
6524         {
6525                 set(rhs);
6526                 return *this;
6527         }
6528
6529         PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6530         {
6531                 set(rhs);
6532                 return *this;
6533         }
6534
6535         PUGI__FN xml_text& xml_text::operator=(long rhs)
6536         {
6537                 set(rhs);
6538                 return *this;
6539         }
6540
6541         PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6542         {
6543                 set(rhs);
6544                 return *this;
6545         }
6546
6547         PUGI__FN xml_text& xml_text::operator=(double rhs)
6548         {
6549                 set(rhs);
6550                 return *this;
6551         }
6552
6553         PUGI__FN xml_text& xml_text::operator=(float rhs)
6554         {
6555                 set(rhs);
6556                 return *this;
6557         }
6558
6559         PUGI__FN xml_text& xml_text::operator=(bool rhs)
6560         {
6561                 set(rhs);
6562                 return *this;
6563         }
6564
6565 #ifdef PUGIXML_HAS_LONG_LONG
6566         PUGI__FN xml_text& xml_text::operator=(long long rhs)
6567         {
6568                 set(rhs);
6569                 return *this;
6570         }
6571
6572         PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6573         {
6574                 set(rhs);
6575                 return *this;
6576         }
6577 #endif
6578
6579         PUGI__FN xml_node xml_text::data() const
6580         {
6581                 return xml_node(_data());
6582         }
6583
6584 #ifdef __BORLANDC__
6585         PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6586         {
6587                 return (bool)lhs && rhs;
6588         }
6589
6590         PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6591         {
6592                 return (bool)lhs || rhs;
6593         }
6594 #endif
6595
6596         PUGI__FN xml_node_iterator::xml_node_iterator()
6597         {
6598         }
6599
6600         PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6601         {
6602         }
6603
6604         PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6605         {
6606         }
6607
6608         PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6609         {
6610                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6611         }
6612
6613         PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6614         {
6615                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6616         }
6617
6618         PUGI__FN xml_node& xml_node_iterator::operator*() const
6619         {
6620                 assert(_wrap._root);
6621                 return _wrap;
6622         }
6623
6624         PUGI__FN xml_node* xml_node_iterator::operator->() const
6625         {
6626                 assert(_wrap._root);
6627                 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6628         }
6629
6630         PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6631         {
6632                 assert(_wrap._root);
6633                 _wrap._root = _wrap._root->next_sibling;
6634                 return *this;
6635         }
6636
6637         PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6638         {
6639                 xml_node_iterator temp = *this;
6640                 ++*this;
6641                 return temp;
6642         }
6643
6644         PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6645         {
6646                 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6647                 return *this;
6648         }
6649
6650         PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6651         {
6652                 xml_node_iterator temp = *this;
6653                 --*this;
6654                 return temp;
6655         }
6656
6657         PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6658         {
6659         }
6660
6661         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6662         {
6663         }
6664
6665         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6666         {
6667         }
6668
6669         PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6670         {
6671                 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6672         }
6673
6674         PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6675         {
6676                 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6677         }
6678
6679         PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6680         {
6681                 assert(_wrap._attr);
6682                 return _wrap;
6683         }
6684
6685         PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6686         {
6687                 assert(_wrap._attr);
6688                 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6689         }
6690
6691         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6692         {
6693                 assert(_wrap._attr);
6694                 _wrap._attr = _wrap._attr->next_attribute;
6695                 return *this;
6696         }
6697
6698         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6699         {
6700                 xml_attribute_iterator temp = *this;
6701                 ++*this;
6702                 return temp;
6703         }
6704
6705         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6706         {
6707                 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6708                 return *this;
6709         }
6710
6711         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6712         {
6713                 xml_attribute_iterator temp = *this;
6714                 --*this;
6715                 return temp;
6716         }
6717
6718         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6719         {
6720         }
6721
6722         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6723         {
6724         }
6725
6726         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6727         {
6728         }
6729
6730         PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6731         {
6732                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6733         }
6734
6735         PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6736         {
6737                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6738         }
6739
6740         PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6741         {
6742                 assert(_wrap._root);
6743                 return _wrap;
6744         }
6745
6746         PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6747         {
6748                 assert(_wrap._root);
6749                 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6750         }
6751
6752         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6753         {
6754                 assert(_wrap._root);
6755                 _wrap = _wrap.next_sibling(_name);
6756                 return *this;
6757         }
6758
6759         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6760         {
6761                 xml_named_node_iterator temp = *this;
6762                 ++*this;
6763                 return temp;
6764         }
6765
6766         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6767         {
6768                 if (_wrap._root)
6769                         _wrap = _wrap.previous_sibling(_name);
6770                 else
6771                 {
6772                         _wrap = _parent.last_child();
6773
6774                         if (!impl::strequal(_wrap.name(), _name))
6775                                 _wrap = _wrap.previous_sibling(_name);
6776                 }
6777
6778                 return *this;
6779         }
6780
6781         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6782         {
6783                 xml_named_node_iterator temp = *this;
6784                 --*this;
6785                 return temp;
6786         }
6787
6788         PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6789         {
6790         }
6791
6792         PUGI__FN xml_parse_result::operator bool() const
6793         {
6794                 return status == status_ok;
6795         }
6796
6797         PUGI__FN const char* xml_parse_result::description() const
6798         {
6799                 switch (status)
6800                 {
6801                 case status_ok: return "No error";
6802
6803                 case status_file_not_found: return "File was not found";
6804                 case status_io_error: return "Error reading from file/stream";
6805                 case status_out_of_memory: return "Could not allocate memory";
6806                 case status_internal_error: return "Internal error occurred";
6807
6808                 case status_unrecognized_tag: return "Could not determine tag type";
6809
6810                 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6811                 case status_bad_comment: return "Error parsing comment";
6812                 case status_bad_cdata: return "Error parsing CDATA section";
6813                 case status_bad_doctype: return "Error parsing document type declaration";
6814                 case status_bad_pcdata: return "Error parsing PCDATA section";
6815                 case status_bad_start_element: return "Error parsing start element tag";
6816                 case status_bad_attribute: return "Error parsing element attribute";
6817                 case status_bad_end_element: return "Error parsing end element tag";
6818                 case status_end_element_mismatch: return "Start-end tags mismatch";
6819
6820                 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6821
6822                 case status_no_document_element: return "No document element found";
6823
6824                 default: return "Unknown error";
6825                 }
6826         }
6827
6828         PUGI__FN xml_document::xml_document(): _buffer(0)
6829         {
6830                 create();
6831         }
6832
6833         PUGI__FN xml_document::~xml_document()
6834         {
6835                 destroy();
6836         }
6837
6838         PUGI__FN void xml_document::reset()
6839         {
6840                 destroy();
6841                 create();
6842         }
6843
6844         PUGI__FN void xml_document::reset(const xml_document& proto)
6845         {
6846                 reset();
6847
6848                 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6849                         append_copy(cur);
6850         }
6851
6852         PUGI__FN void xml_document::create()
6853         {
6854                 assert(!_root);
6855
6856         #ifdef PUGIXML_COMPACT
6857                 const size_t page_offset = sizeof(uint32_t);
6858         #else
6859                 const size_t page_offset = 0;
6860         #endif
6861
6862                 // initialize sentinel page
6863                 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6864
6865                 // prepare page structure
6866                 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6867                 assert(page);
6868
6869                 page->busy_size = impl::xml_memory_page_size;
6870
6871                 // setup first page marker
6872         #ifdef PUGIXML_COMPACT
6873                 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
6874                 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6875                 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6876         #endif
6877
6878                 // allocate new root
6879                 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6880                 _root->prev_sibling_c = _root;
6881
6882                 // setup sentinel page
6883                 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6884
6885                 // verify the document allocation
6886                 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6887         }
6888
6889         PUGI__FN void xml_document::destroy()
6890         {
6891                 assert(_root);
6892
6893                 // destroy static storage
6894                 if (_buffer)
6895                 {
6896                         impl::xml_memory::deallocate(_buffer);
6897                         _buffer = 0;
6898                 }
6899
6900                 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6901                 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6902                 {
6903                         if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6904                 }
6905
6906                 // destroy dynamic storage, leave sentinel page (it's in static memory)
6907                 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6908                 assert(root_page && !root_page->prev);
6909                 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6910
6911                 for (impl::xml_memory_page* page = root_page->next; page; )
6912                 {
6913                         impl::xml_memory_page* next = page->next;
6914
6915                         impl::xml_allocator::deallocate_page(page);
6916
6917                         page = next;
6918                 }
6919
6920         #ifdef PUGIXML_COMPACT
6921                 // destroy hash table
6922                 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6923         #endif
6924
6925                 _root = 0;
6926         }
6927
6928 #ifndef PUGIXML_NO_STL
6929         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
6930         {
6931                 reset();
6932
6933                 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
6934         }
6935
6936         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
6937         {
6938                 reset();
6939
6940                 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
6941         }
6942 #endif
6943
6944         PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
6945         {
6946                 // Force native encoding (skip autodetection)
6947         #ifdef PUGIXML_WCHAR_MODE
6948                 xml_encoding encoding = encoding_wchar;
6949         #else
6950                 xml_encoding encoding = encoding_utf8;
6951         #endif
6952
6953                 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
6954         }
6955
6956         PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
6957         {
6958                 return load_string(contents, options);
6959         }
6960
6961         PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
6962         {
6963                 reset();
6964
6965                 using impl::auto_deleter; // MSVC7 workaround
6966                 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
6967
6968                 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6969         }
6970
6971         PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
6972         {
6973                 reset();
6974
6975                 using impl::auto_deleter; // MSVC7 workaround
6976                 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
6977
6978                 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6979         }
6980
6981         PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6982         {
6983                 reset();
6984
6985                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
6986         }
6987
6988         PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6989         {
6990                 reset();
6991
6992                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
6993         }
6994
6995         PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6996         {
6997                 reset();
6998
6999                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7000         }
7001
7002         PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7003         {
7004                 impl::xml_buffered_writer buffered_writer(writer, encoding);
7005
7006                 if ((flags & format_write_bom) && encoding != encoding_latin1)
7007                 {
7008                         // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7009                 #ifdef PUGIXML_WCHAR_MODE
7010                         unsigned int bom = 0xfeff;
7011                         buffered_writer.write(static_cast<wchar_t>(bom));
7012                 #else
7013                         buffered_writer.write('\xef', '\xbb', '\xbf');
7014                 #endif
7015                 }
7016
7017                 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7018                 {
7019                         buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7020                         if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7021                         buffered_writer.write('?', '>');
7022                         if (!(flags & format_raw)) buffered_writer.write('\n');
7023                 }
7024
7025                 impl::node_output(buffered_writer, _root, indent, flags, 0);
7026
7027                 buffered_writer.flush();
7028         }
7029
7030 #ifndef PUGIXML_NO_STL
7031         PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7032         {
7033                 xml_writer_stream writer(stream);
7034
7035                 save(writer, indent, flags, encoding);
7036         }
7037
7038         PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7039         {
7040                 xml_writer_stream writer(stream);
7041
7042                 save(writer, indent, flags, encoding_wchar);
7043         }
7044 #endif
7045
7046         PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7047         {
7048                 using impl::auto_deleter; // MSVC7 workaround
7049                 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7050
7051                 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7052         }
7053
7054         PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7055         {
7056                 using impl::auto_deleter; // MSVC7 workaround
7057                 wchar_t openMode[3] = {L'\0'};
7058                 if (flags & format_save_file_text) {
7059                         openMode[0] = L'w';
7060                 } else {
7061                         openMode[0] = L'w';
7062                         openMode[1] = L'b';
7063                 }
7064                 auto_deleter<FILE> file(impl::open_file_wide(path_, openMode), impl::close_file);
7065
7066                 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7067         }
7068
7069         PUGI__FN xml_node xml_document::document_element() const
7070         {
7071                 assert(_root);
7072
7073                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7074                         if (PUGI__NODETYPE(i) == node_element)
7075                                 return xml_node(i);
7076
7077                 return xml_node();
7078         }
7079
7080 #ifndef PUGIXML_NO_STL
7081         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7082         {
7083                 assert(str);
7084
7085                 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7086         }
7087
7088         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7089         {
7090                 return impl::as_utf8_impl(str.c_str(), str.size());
7091         }
7092
7093         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7094         {
7095                 assert(str);
7096
7097                 return impl::as_wide_impl(str, strlen(str));
7098         }
7099
7100         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7101         {
7102                 return impl::as_wide_impl(str.c_str(), str.size());
7103         }
7104 #endif
7105
7106         PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7107         {
7108                 impl::xml_memory::allocate = allocate;
7109                 impl::xml_memory::deallocate = deallocate;
7110         }
7111
7112         PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7113         {
7114                 return impl::xml_memory::allocate;
7115         }
7116
7117         PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7118         {
7119                 return impl::xml_memory::deallocate;
7120         }
7121 }
7122
7123 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7124 namespace std
7125 {
7126         // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7127         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7128         {
7129                 return std::bidirectional_iterator_tag();
7130         }
7131
7132         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7133         {
7134                 return std::bidirectional_iterator_tag();
7135         }
7136
7137         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7138         {
7139                 return std::bidirectional_iterator_tag();
7140         }
7141 }
7142 #endif
7143
7144 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7145 namespace std
7146 {
7147         // Workarounds for (non-standard) iterator category detection
7148         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7149         {
7150                 return std::bidirectional_iterator_tag();
7151         }
7152
7153         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7154         {
7155                 return std::bidirectional_iterator_tag();
7156         }
7157
7158         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7159         {
7160                 return std::bidirectional_iterator_tag();
7161         }
7162 }
7163 #endif
7164
7165 #ifndef PUGIXML_NO_XPATH
7166 // STL replacements
7167 PUGI__NS_BEGIN
7168         struct equal_to
7169         {
7170                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7171                 {
7172                         return lhs == rhs;
7173                 }
7174         };
7175
7176         struct not_equal_to
7177         {
7178                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7179                 {
7180                         return lhs != rhs;
7181                 }
7182         };
7183
7184         struct less
7185         {
7186                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7187                 {
7188                         return lhs < rhs;
7189                 }
7190         };
7191
7192         struct less_equal
7193         {
7194                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7195                 {
7196                         return lhs <= rhs;
7197                 }
7198         };
7199
7200         template <typename T> void swap(T& lhs, T& rhs)
7201         {
7202                 T temp = lhs;
7203                 lhs = rhs;
7204                 rhs = temp;
7205         }
7206
7207         template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7208         {
7209                 I result = begin;
7210
7211                 for (I it = begin + 1; it != end; ++it)
7212                         if (pred(*it, *result))
7213                                 result = it;
7214
7215                 return result;
7216         }
7217
7218         template <typename I> void reverse(I begin, I end)
7219         {
7220                 while (end - begin > 1) swap(*begin++, *--end);
7221         }
7222
7223         template <typename I> I unique(I begin, I end)
7224         {
7225                 // fast skip head
7226                 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7227
7228                 if (begin == end) return begin;
7229
7230                 // last written element
7231                 I write = begin++;
7232
7233                 // merge unique elements
7234                 while (begin != end)
7235                 {
7236                         if (*begin != *write)
7237                                 *++write = *begin++;
7238                         else
7239                                 begin++;
7240                 }
7241
7242                 // past-the-end (write points to live element)
7243                 return write + 1;
7244         }
7245
7246         template <typename I> void copy_backwards(I begin, I end, I target)
7247         {
7248                 while (begin != end) *--target = *--end;
7249         }
7250
7251         template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
7252         {
7253                 assert(begin != end);
7254
7255                 for (I it = begin + 1; it != end; ++it)
7256                 {
7257                         T val = *it;
7258
7259                         if (pred(val, *begin))
7260                         {
7261                                 // move to front
7262                                 copy_backwards(begin, it, it + 1);
7263                                 *begin = val;
7264                         }
7265                         else
7266                         {
7267                                 I hole = it;
7268
7269                                 // move hole backwards
7270                                 while (pred(val, *(hole - 1)))
7271                                 {
7272                                         *hole = *(hole - 1);
7273                                         hole--;
7274                                 }
7275
7276                                 // fill hole with element
7277                                 *hole = val;
7278                         }
7279                 }
7280         }
7281
7282         // std variant for elements with ==
7283         template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
7284         {
7285                 I eqbeg = middle, eqend = middle + 1;
7286
7287                 // expand equal range
7288                 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7289                 while (eqend != end && *eqend == *eqbeg) ++eqend;
7290
7291                 // process outer elements
7292                 I ltend = eqbeg, gtbeg = eqend;
7293
7294                 for (;;)
7295                 {
7296                         // find the element from the right side that belongs to the left one
7297                         for (; gtbeg != end; ++gtbeg)
7298                                 if (!pred(*eqbeg, *gtbeg))
7299                                 {
7300                                         if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
7301                                         else break;
7302                                 }
7303
7304                         // find the element from the left side that belongs to the right one
7305                         for (; ltend != begin; --ltend)
7306                                 if (!pred(*(ltend - 1), *eqbeg))
7307                                 {
7308                                         if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
7309                                         else break;
7310                                 }
7311
7312                         // scanned all elements
7313                         if (gtbeg == end && ltend == begin)
7314                         {
7315                                 *out_eqbeg = eqbeg;
7316                                 *out_eqend = eqend;
7317                                 return;
7318                         }
7319
7320                         // make room for elements by moving equal area
7321                         if (gtbeg == end)
7322                         {
7323                                 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7324                                 swap(*eqbeg, *--eqend);
7325                         }
7326                         else if (ltend == begin)
7327                         {
7328                                 if (eqend != gtbeg) swap(*eqbeg, *eqend);
7329                                 ++eqend;
7330                                 swap(*gtbeg++, *eqbeg++);
7331                         }
7332                         else swap(*gtbeg++, *--ltend);
7333                 }
7334         }
7335
7336         template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
7337         {
7338                 if (pred(*middle, *first)) swap(*middle, *first);
7339                 if (pred(*last, *middle)) swap(*last, *middle);
7340                 if (pred(*middle, *first)) swap(*middle, *first);
7341         }
7342
7343         template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
7344         {
7345                 if (last - first <= 40)
7346                 {
7347                         // median of three for small chunks
7348                         median3(first, middle, last, pred);
7349                 }
7350                 else
7351                 {
7352                         // median of nine
7353                         size_t step = (last - first + 1) / 8;
7354
7355                         median3(first, first + step, first + 2 * step, pred);
7356                         median3(middle - step, middle, middle + step, pred);
7357                         median3(last - 2 * step, last - step, last, pred);
7358                         median3(first + step, middle, last - step, pred);
7359                 }
7360         }
7361
7362         template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7363         {
7364                 // sort large chunks
7365                 while (end - begin > 32)
7366                 {
7367                         // find median element
7368                         I middle = begin + (end - begin) / 2;
7369                         median(begin, middle, end - 1, pred);
7370
7371                         // partition in three chunks (< = >)
7372                         I eqbeg, eqend;
7373                         partition(begin, middle, end, pred, &eqbeg, &eqend);
7374
7375                         // loop on larger half
7376                         if (eqbeg - begin > end - eqend)
7377                         {
7378                                 sort(eqend, end, pred);
7379                                 end = eqbeg;
7380                         }
7381                         else
7382                         {
7383                                 sort(begin, eqbeg, pred);
7384                                 begin = eqend;
7385                         }
7386                 }
7387
7388                 // insertion sort small chunk
7389                 if (begin != end) insertion_sort(begin, end, pred, &*begin);
7390         }
7391 PUGI__NS_END
7392
7393 // Allocator used for AST and evaluation stacks
7394 PUGI__NS_BEGIN
7395         static const size_t xpath_memory_page_size =
7396         #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7397                 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7398         #else
7399                 4096
7400         #endif
7401                 ;
7402
7403         static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7404
7405         struct xpath_memory_block
7406         {
7407                 xpath_memory_block* next;
7408                 size_t capacity;
7409
7410                 union
7411                 {
7412                         char data[xpath_memory_page_size];
7413                         double alignment;
7414                 };
7415         };
7416
7417         class xpath_allocator
7418         {
7419                 xpath_memory_block* _root;
7420                 size_t _root_size;
7421
7422         public:
7423         #ifdef PUGIXML_NO_EXCEPTIONS
7424                 jmp_buf* error_handler;
7425         #endif
7426
7427                 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
7428                 {
7429                 #ifdef PUGIXML_NO_EXCEPTIONS
7430                         error_handler = 0;
7431                 #endif
7432                 }
7433
7434                 void* allocate_nothrow(size_t size)
7435                 {
7436                         // round size up to block alignment boundary
7437                         size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7438
7439                         if (_root_size + size <= _root->capacity)
7440                         {
7441                                 void* buf = &_root->data[0] + _root_size;
7442                                 _root_size += size;
7443                                 return buf;
7444                         }
7445                         else
7446                         {
7447                                 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7448                                 size_t block_capacity_base = sizeof(_root->data);
7449                                 size_t block_capacity_req = size + block_capacity_base / 4;
7450                                 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7451
7452                                 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7453
7454                                 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7455                                 if (!block) return 0;
7456
7457                                 block->next = _root;
7458                                 block->capacity = block_capacity;
7459
7460                                 _root = block;
7461                                 _root_size = size;
7462
7463                                 return block->data;
7464                         }
7465                 }
7466
7467                 void* allocate(size_t size)
7468                 {
7469                         void* result = allocate_nothrow(size);
7470
7471                         if (!result)
7472                         {
7473                         #ifdef PUGIXML_NO_EXCEPTIONS
7474                                 assert(error_handler);
7475                                 longjmp(*error_handler, 1);
7476                         #else
7477                                 throw std::bad_alloc();
7478                         #endif
7479                         }
7480
7481                         return result;
7482                 }
7483
7484                 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7485                 {
7486                         // round size up to block alignment boundary
7487                         old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7488                         new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7489
7490                         // we can only reallocate the last object
7491                         assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7492
7493                         // adjust root size so that we have not allocated the object at all
7494                         bool only_object = (_root_size == old_size);
7495
7496                         if (ptr) _root_size -= old_size;
7497
7498                         // allocate a new version (this will obviously reuse the memory if possible)
7499                         void* result = allocate(new_size);
7500                         assert(result);
7501
7502                         // we have a new block
7503                         if (result != ptr && ptr)
7504                         {
7505                                 // copy old data
7506                                 assert(new_size >= old_size);
7507                                 memcpy(result, ptr, old_size);
7508
7509                                 // free the previous page if it had no other objects
7510                                 if (only_object)
7511                                 {
7512                                         assert(_root->data == result);
7513                                         assert(_root->next);
7514
7515                                         xpath_memory_block* next = _root->next->next;
7516
7517                                         if (next)
7518                                         {
7519                                                 // deallocate the whole page, unless it was the first one
7520                                                 xml_memory::deallocate(_root->next);
7521                                                 _root->next = next;
7522                                         }
7523                                 }
7524                         }
7525
7526                         return result;
7527                 }
7528
7529                 void revert(const xpath_allocator& state)
7530                 {
7531                         // free all new pages
7532                         xpath_memory_block* cur = _root;
7533
7534                         while (cur != state._root)
7535                         {
7536                                 xpath_memory_block* next = cur->next;
7537
7538                                 xml_memory::deallocate(cur);
7539
7540                                 cur = next;
7541                         }
7542
7543                         // restore state
7544                         _root = state._root;
7545                         _root_size = state._root_size;
7546                 }
7547
7548                 void release()
7549                 {
7550                         xpath_memory_block* cur = _root;
7551                         assert(cur);
7552
7553                         while (cur->next)
7554                         {
7555                                 xpath_memory_block* next = cur->next;
7556
7557                                 xml_memory::deallocate(cur);
7558
7559                                 cur = next;
7560                         }
7561                 }
7562         };
7563
7564         struct xpath_allocator_capture
7565         {
7566                 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7567                 {
7568                 }
7569
7570                 ~xpath_allocator_capture()
7571                 {
7572                         _target->revert(_state);
7573                 }
7574
7575                 xpath_allocator* _target;
7576                 xpath_allocator _state;
7577         };
7578
7579         struct xpath_stack
7580         {
7581                 xpath_allocator* result;
7582                 xpath_allocator* temp;
7583         };
7584
7585         struct xpath_stack_data
7586         {
7587                 xpath_memory_block blocks[2];
7588                 xpath_allocator result;
7589                 xpath_allocator temp;
7590                 xpath_stack stack;
7591
7592         #ifdef PUGIXML_NO_EXCEPTIONS
7593                 jmp_buf error_handler;
7594         #endif
7595
7596                 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
7597                 {
7598                         blocks[0].next = blocks[1].next = 0;
7599                         blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7600
7601                         stack.result = &result;
7602                         stack.temp = &temp;
7603
7604                 #ifdef PUGIXML_NO_EXCEPTIONS
7605                         result.error_handler = temp.error_handler = &error_handler;
7606                 #endif
7607                 }
7608
7609                 ~xpath_stack_data()
7610                 {
7611                         result.release();
7612                         temp.release();
7613                 }
7614         };
7615 PUGI__NS_END
7616
7617 // String class
7618 PUGI__NS_BEGIN
7619         class xpath_string
7620         {
7621                 const char_t* _buffer;
7622                 bool _uses_heap;
7623                 size_t _length_heap;
7624
7625                 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7626                 {
7627                         char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7628                         assert(result);
7629
7630                         memcpy(result, string, length * sizeof(char_t));
7631                         result[length] = 0;
7632
7633                         return result;
7634                 }
7635
7636                 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7637                 {
7638                 }
7639
7640         public:
7641                 static xpath_string from_const(const char_t* str)
7642                 {
7643                         return xpath_string(str, false, 0);
7644                 }
7645
7646                 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7647                 {
7648                         assert(begin <= end && *end == 0);
7649
7650                         return xpath_string(begin, true, static_cast<size_t>(end - begin));
7651                 }
7652
7653                 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7654                 {
7655                         assert(begin <= end);
7656
7657                         size_t length = static_cast<size_t>(end - begin);
7658
7659                         return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
7660                 }
7661
7662                 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7663                 {
7664                 }
7665
7666                 void append(const xpath_string& o, xpath_allocator* alloc)
7667                 {
7668                         // skip empty sources
7669                         if (!*o._buffer) return;
7670
7671                         // fast append for constant empty target and constant source
7672                         if (!*_buffer && !_uses_heap && !o._uses_heap)
7673                         {
7674                                 _buffer = o._buffer;
7675                         }
7676                         else
7677                         {
7678                                 // need to make heap copy
7679                                 size_t target_length = length();
7680                                 size_t source_length = o.length();
7681                                 size_t result_length = target_length + source_length;
7682
7683                                 // allocate new buffer
7684                                 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7685                                 assert(result);
7686
7687                                 // append first string to the new buffer in case there was no reallocation
7688                                 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7689
7690                                 // append second string to the new buffer
7691                                 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7692                                 result[result_length] = 0;
7693
7694                                 // finalize
7695                                 _buffer = result;
7696                                 _uses_heap = true;
7697                                 _length_heap = result_length;
7698                         }
7699                 }
7700
7701                 const char_t* c_str() const
7702                 {
7703                         return _buffer;
7704                 }
7705
7706                 size_t length() const
7707                 {
7708                         return _uses_heap ? _length_heap : strlength(_buffer);
7709                 }
7710
7711                 char_t* data(xpath_allocator* alloc)
7712                 {
7713                         // make private heap copy
7714                         if (!_uses_heap)
7715                         {
7716                                 size_t length_ = strlength(_buffer);
7717
7718                                 _buffer = duplicate_string(_buffer, length_, alloc);
7719                                 _uses_heap = true;
7720                                 _length_heap = length_;
7721                         }
7722
7723                         return const_cast<char_t*>(_buffer);
7724                 }
7725
7726                 bool empty() const
7727                 {
7728                         return *_buffer == 0;
7729                 }
7730
7731                 bool operator==(const xpath_string& o) const
7732                 {
7733                         return strequal(_buffer, o._buffer);
7734                 }
7735
7736                 bool operator!=(const xpath_string& o) const
7737                 {
7738                         return !strequal(_buffer, o._buffer);
7739                 }
7740
7741                 bool uses_heap() const
7742                 {
7743                         return _uses_heap;
7744                 }
7745         };
7746 PUGI__NS_END
7747
7748 PUGI__NS_BEGIN
7749         PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7750         {
7751                 while (*pattern && *string == *pattern)
7752                 {
7753                         string++;
7754                         pattern++;
7755                 }
7756
7757                 return *pattern == 0;
7758         }
7759
7760         PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7761         {
7762         #ifdef PUGIXML_WCHAR_MODE
7763                 return wcschr(s, c);
7764         #else
7765                 return strchr(s, c);
7766         #endif
7767         }
7768
7769         PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7770         {
7771         #ifdef PUGIXML_WCHAR_MODE
7772                 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7773                 return (*p == 0) ? s : wcsstr(s, p);
7774         #else
7775                 return strstr(s, p);
7776         #endif
7777         }
7778
7779         // Converts symbol to lower case, if it is an ASCII one
7780         PUGI__FN char_t tolower_ascii(char_t ch)
7781         {
7782                 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7783         }
7784
7785         PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7786         {
7787                 if (na.attribute())
7788                         return xpath_string::from_const(na.attribute().value());
7789                 else
7790                 {
7791                         xml_node n = na.node();
7792
7793                         switch (n.type())
7794                         {
7795                         case node_pcdata:
7796                         case node_cdata:
7797                         case node_comment:
7798                         case node_pi:
7799                                 return xpath_string::from_const(n.value());
7800
7801                         case node_document:
7802                         case node_element:
7803                         {
7804                                 xpath_string result;
7805
7806                                 // element nodes can have value if parse_embed_pcdata was used
7807                                 if (n.value()[0])
7808                                         result.append(xpath_string::from_const(n.value()), alloc);
7809
7810                                 xml_node cur = n.first_child();
7811
7812                                 while (cur && cur != n)
7813                                 {
7814                                         if (cur.type() == node_pcdata || cur.type() == node_cdata)
7815                                                 result.append(xpath_string::from_const(cur.value()), alloc);
7816
7817                                         if (cur.first_child())
7818                                                 cur = cur.first_child();
7819                                         else if (cur.next_sibling())
7820                                                 cur = cur.next_sibling();
7821                                         else
7822                                         {
7823                                                 while (!cur.next_sibling() && cur != n)
7824                                                         cur = cur.parent();
7825
7826                                                 if (cur != n) cur = cur.next_sibling();
7827                                         }
7828                                 }
7829
7830                                 return result;
7831                         }
7832
7833                         default:
7834                                 return xpath_string();
7835                         }
7836                 }
7837         }
7838
7839         PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7840         {
7841                 assert(ln->parent == rn->parent);
7842
7843                 // there is no common ancestor (the shared parent is null), nodes are from different documents
7844                 if (!ln->parent) return ln < rn;
7845
7846                 // determine sibling order
7847                 xml_node_struct* ls = ln;
7848                 xml_node_struct* rs = rn;
7849
7850                 while (ls && rs)
7851                 {
7852                         if (ls == rn) return true;
7853                         if (rs == ln) return false;
7854
7855                         ls = ls->next_sibling;
7856                         rs = rs->next_sibling;
7857                 }
7858
7859                 // if rn sibling chain ended ln must be before rn
7860                 return !rs;
7861         }
7862
7863         PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7864         {
7865                 // find common ancestor at the same depth, if any
7866                 xml_node_struct* lp = ln;
7867                 xml_node_struct* rp = rn;
7868
7869                 while (lp && rp && lp->parent != rp->parent)
7870                 {
7871                         lp = lp->parent;
7872                         rp = rp->parent;
7873                 }
7874
7875                 // parents are the same!
7876                 if (lp && rp) return node_is_before_sibling(lp, rp);
7877
7878                 // nodes are at different depths, need to normalize heights
7879                 bool left_higher = !lp;
7880
7881                 while (lp)
7882                 {
7883                         lp = lp->parent;
7884                         ln = ln->parent;
7885                 }
7886
7887                 while (rp)
7888                 {
7889                         rp = rp->parent;
7890                         rn = rn->parent;
7891                 }
7892
7893                 // one node is the ancestor of the other
7894                 if (ln == rn) return left_higher;
7895
7896                 // find common ancestor... again
7897                 while (ln->parent != rn->parent)
7898                 {
7899                         ln = ln->parent;
7900                         rn = rn->parent;
7901                 }
7902
7903                 return node_is_before_sibling(ln, rn);
7904         }
7905
7906         PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7907         {
7908                 while (node && node != parent) node = node->parent;
7909
7910                 return parent && node == parent;
7911         }
7912
7913         PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7914         {
7915                 xml_node_struct* node = xnode.node().internal_object();
7916
7917                 if (node)
7918                 {
7919                         if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7920                         {
7921                                 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7922                                 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7923                         }
7924
7925                         return 0;
7926                 }
7927
7928                 xml_attribute_struct* attr = xnode.attribute().internal_object();
7929
7930                 if (attr)
7931                 {
7932                         if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7933                         {
7934                                 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7935                                 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7936                         }
7937
7938                         return 0;
7939                 }
7940
7941                 return 0;
7942         }
7943
7944         struct document_order_comparator
7945         {
7946                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7947                 {
7948                         // optimized document order based check
7949                         const void* lo = document_buffer_order(lhs);
7950                         const void* ro = document_buffer_order(rhs);
7951
7952                         if (lo && ro) return lo < ro;
7953
7954                         // slow comparison
7955                         xml_node ln = lhs.node(), rn = rhs.node();
7956
7957                         // compare attributes
7958                         if (lhs.attribute() && rhs.attribute())
7959                         {
7960                                 // shared parent
7961                                 if (lhs.parent() == rhs.parent())
7962                                 {
7963                                         // determine sibling order
7964                                         for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7965                                                 if (a == rhs.attribute())
7966                                                         return true;
7967
7968                                         return false;
7969                                 }
7970
7971                                 // compare attribute parents
7972                                 ln = lhs.parent();
7973                                 rn = rhs.parent();
7974                         }
7975                         else if (lhs.attribute())
7976                         {
7977                                 // attributes go after the parent element
7978                                 if (lhs.parent() == rhs.node()) return false;
7979
7980                                 ln = lhs.parent();
7981                         }
7982                         else if (rhs.attribute())
7983                         {
7984                                 // attributes go after the parent element
7985                                 if (rhs.parent() == lhs.node()) return true;
7986
7987                                 rn = rhs.parent();
7988                         }
7989
7990                         if (ln == rn) return false;
7991
7992                         if (!ln || !rn) return ln < rn;
7993
7994                         return node_is_before(ln.internal_object(), rn.internal_object());
7995                 }
7996         };
7997
7998         struct duplicate_comparator
7999         {
8000                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8001                 {
8002                         if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
8003                         else return rhs.attribute() ? false : lhs.node() < rhs.node();
8004                 }
8005         };
8006
8007         PUGI__FN double gen_nan()
8008         {
8009         #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8010                 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8011                 typedef uint32_t UI; // BCC5 workaround
8012                 union { float f; UI i; } u;
8013                 u.i = 0x7fc00000;
8014                 return u.f;
8015         #else
8016                 // fallback
8017                 const volatile double zero = 0.0;
8018                 return zero / zero;
8019         #endif
8020         }
8021
8022         PUGI__FN bool is_nan(double value)
8023         {
8024         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8025                 return !!_isnan(value);
8026         #elif defined(fpclassify) && defined(FP_NAN)
8027                 return fpclassify(value) == FP_NAN;
8028         #else
8029                 // fallback
8030                 const volatile double v = value;
8031                 return v != v;
8032         #endif
8033         }
8034
8035         PUGI__FN const char_t* convert_number_to_string_special(double value)
8036         {
8037         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8038                 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8039                 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8040                 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8041         #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8042                 switch (fpclassify(value))
8043                 {
8044                 case FP_NAN:
8045                         return PUGIXML_TEXT("NaN");
8046
8047                 case FP_INFINITE:
8048                         return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8049
8050                 case FP_ZERO:
8051                         return PUGIXML_TEXT("0");
8052
8053                 default:
8054                         return 0;
8055                 }
8056         #else
8057                 // fallback
8058                 const volatile double v = value;
8059
8060                 if (v == 0) return PUGIXML_TEXT("0");
8061                 if (v != v) return PUGIXML_TEXT("NaN");
8062                 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8063                 return 0;
8064         #endif
8065         }
8066
8067         PUGI__FN bool convert_number_to_boolean(double value)
8068         {
8069                 return (value != 0 && !is_nan(value));
8070         }
8071
8072         PUGI__FN void truncate_zeros(char* begin, char* end)
8073         {
8074                 while (begin != end && end[-1] == '0') end--;
8075
8076                 *end = 0;
8077         }
8078
8079         // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8080 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8081         PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8082         {
8083                 // get base values
8084                 int sign, exponent;
8085                 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
8086
8087                 // truncate redundant zeros
8088                 truncate_zeros(buffer, buffer + strlen(buffer));
8089
8090                 // fill results
8091                 *out_mantissa = buffer;
8092                 *out_exponent = exponent;
8093         }
8094 #else
8095         PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8096         {
8097                 // get a scientific notation value with IEEE DBL_DIG decimals
8098                 sprintf(buffer, "%.*e", DBL_DIG, value);
8099                 assert(strlen(buffer) < buffer_size);
8100                 (void)!buffer_size;
8101
8102                 // get the exponent (possibly negative)
8103                 char* exponent_string = strchr(buffer, 'e');
8104                 assert(exponent_string);
8105
8106                 int exponent = atoi(exponent_string + 1);
8107
8108                 // extract mantissa string: skip sign
8109                 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8110                 assert(mantissa[0] != '0' && mantissa[1] == '.');
8111
8112                 // divide mantissa by 10 to eliminate integer part
8113                 mantissa[1] = mantissa[0];
8114                 mantissa++;
8115                 exponent++;
8116
8117                 // remove extra mantissa digits and zero-terminate mantissa
8118                 truncate_zeros(mantissa, exponent_string);
8119
8120                 // fill results
8121                 *out_mantissa = mantissa;
8122                 *out_exponent = exponent;
8123         }
8124 #endif
8125
8126         PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8127         {
8128                 // try special number conversion
8129                 const char_t* special = convert_number_to_string_special(value);
8130                 if (special) return xpath_string::from_const(special);
8131
8132                 // get mantissa + exponent form
8133                 char mantissa_buffer[32] = {};
8134
8135                 char* mantissa = nullptr;
8136                 int exponent = 0;
8137                 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
8138
8139                 // allocate a buffer of suitable length for the number
8140                 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8141                 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8142                 assert(result);
8143
8144                 // make the number!
8145                 char_t* s = result;
8146
8147                 // sign
8148                 if (value < 0) *s++ = '-';
8149
8150                 // integer part
8151                 if (exponent <= 0)
8152                 {
8153                         *s++ = '0';
8154                 }
8155                 else
8156                 {
8157                         while (exponent > 0)
8158                         {
8159                                 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
8160                                 *s++ = *mantissa ? *mantissa++ : '0';
8161                                 exponent--;
8162                         }
8163                 }
8164
8165                 // fractional part
8166                 if (*mantissa)
8167                 {
8168                         // decimal point
8169                         *s++ = '.';
8170
8171                         // extra zeroes from negative exponent
8172                         while (exponent < 0)
8173                         {
8174                                 *s++ = '0';
8175                                 exponent++;
8176                         }
8177
8178                         // extra mantissa digits
8179                         while (*mantissa)
8180                         {
8181                                 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8182                                 *s++ = *mantissa++;
8183                         }
8184                 }
8185
8186                 // zero-terminate
8187                 assert(s < result + result_size);
8188                 *s = 0;
8189
8190                 return xpath_string::from_heap_preallocated(result, s);
8191         }
8192
8193         PUGI__FN bool check_string_to_number_format(const char_t* string)
8194         {
8195                 // parse leading whitespace
8196                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8197
8198                 // parse sign
8199                 if (*string == '-') ++string;
8200
8201                 if (!*string) return false;
8202
8203                 // if there is no integer part, there should be a decimal part with at least one digit
8204                 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8205
8206                 // parse integer part
8207                 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8208
8209                 // parse decimal part
8210                 if (*string == '.')
8211                 {
8212                         ++string;
8213
8214                         while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8215                 }
8216
8217                 // parse trailing whitespace
8218                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8219
8220                 return *string == 0;
8221         }
8222
8223         PUGI__FN double convert_string_to_number(const char_t* string)
8224         {
8225                 // check string format
8226                 if (!check_string_to_number_format(string)) return gen_nan();
8227
8228                 // parse string
8229         #ifdef PUGIXML_WCHAR_MODE
8230                 return wcstod(string, 0);
8231         #else
8232                 return strtod(string, 0);
8233         #endif
8234         }
8235
8236         PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8237         {
8238                 size_t length = static_cast<size_t>(end - begin);
8239                 char_t* scratch = buffer;
8240
8241                 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8242                 {
8243                         // need to make dummy on-heap copy
8244                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8245                         if (!scratch) return false;
8246                 }
8247
8248                 // copy string to zero-terminated buffer and perform conversion
8249                 memcpy(scratch, begin, length * sizeof(char_t));
8250                 scratch[length] = 0;
8251
8252                 *out_result = convert_string_to_number(scratch);
8253
8254                 // free dummy buffer
8255                 if (scratch != buffer) xml_memory::deallocate(scratch);
8256
8257                 return true;
8258         }
8259
8260         PUGI__FN double round_nearest(double value)
8261         {
8262                 return floor(value + 0.5);
8263         }
8264
8265         PUGI__FN double round_nearest_nzero(double value)
8266         {
8267                 // same as round_nearest, but returns -0 for [-0.5, -0]
8268                 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8269                 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8270         }
8271
8272         PUGI__FN const char_t* qualified_name(const xpath_node& node)
8273         {
8274                 return node.attribute() ? node.attribute().name() : node.node().name();
8275         }
8276
8277         PUGI__FN const char_t* local_name(const xpath_node& node)
8278         {
8279                 const char_t* name = qualified_name(node);
8280                 const char_t* p = find_char(name, ':');
8281
8282                 return p ? p + 1 : name;
8283         }
8284
8285         struct namespace_uri_predicate
8286         {
8287                 const char_t* prefix;
8288                 size_t prefix_length;
8289
8290                 namespace_uri_predicate(const char_t* name)
8291                 {
8292                         const char_t* pos = find_char(name, ':');
8293
8294                         prefix = pos ? name : 0;
8295                         prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8296                 }
8297
8298                 bool operator()(xml_attribute a) const
8299                 {
8300                         const char_t* name = a.name();
8301
8302                         if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8303
8304                         return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8305                 }
8306         };
8307
8308         PUGI__FN const char_t* namespace_uri(xml_node node)
8309         {
8310                 namespace_uri_predicate pred = node.name();
8311
8312                 xml_node p = node;
8313
8314                 while (p)
8315                 {
8316                         xml_attribute a = p.find_attribute(pred);
8317
8318                         if (a) return a.value();
8319
8320                         p = p.parent();
8321                 }
8322
8323                 return PUGIXML_TEXT("");
8324         }
8325
8326         PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8327         {
8328                 namespace_uri_predicate pred = attr.name();
8329
8330                 // Default namespace does not apply to attributes
8331                 if (!pred.prefix) return PUGIXML_TEXT("");
8332
8333                 xml_node p = parent;
8334
8335                 while (p)
8336                 {
8337                         xml_attribute a = p.find_attribute(pred);
8338
8339                         if (a) return a.value();
8340
8341                         p = p.parent();
8342                 }
8343
8344                 return PUGIXML_TEXT("");
8345         }
8346
8347         PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8348         {
8349                 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8350         }
8351
8352         PUGI__FN char_t* normalize_space(char_t* buffer)
8353         {
8354                 char_t* write = buffer;
8355
8356                 for (char_t* it = buffer; *it; )
8357                 {
8358                         char_t ch = *it++;
8359
8360                         if (PUGI__IS_CHARTYPE(ch, ct_space))
8361                         {
8362                                 // replace whitespace sequence with single space
8363                                 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8364
8365                                 // avoid leading spaces
8366                                 if (write != buffer) *write++ = ' ';
8367                         }
8368                         else *write++ = ch;
8369                 }
8370
8371                 // remove trailing space
8372                 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8373
8374                 // zero-terminate
8375                 *write = 0;
8376
8377                 return write;
8378         }
8379
8380         PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8381         {
8382                 char_t* write = buffer;
8383
8384                 while (*buffer)
8385                 {
8386                         PUGI__DMC_VOLATILE char_t ch = *buffer++;
8387
8388                         const char_t* pos = find_char(from, ch);
8389
8390                         if (!pos)
8391                                 *write++ = ch; // do not process
8392                         else if (static_cast<size_t>(pos - from) < to_length)
8393                                 *write++ = to[pos - from]; // replace
8394                 }
8395
8396                 // zero-terminate
8397                 *write = 0;
8398
8399                 return write;
8400         }
8401
8402         PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8403         {
8404                 unsigned char table[128] = {0};
8405
8406                 while (*from)
8407                 {
8408                         unsigned int fc = static_cast<unsigned int>(*from);
8409                         unsigned int tc = static_cast<unsigned int>(*to);
8410
8411                         if (fc >= 128 || tc >= 128)
8412                                 return 0;
8413
8414                         // code=128 means "skip character"
8415                         if (!table[fc])
8416                                 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8417
8418                         from++;
8419                         if (tc) to++;
8420                 }
8421
8422                 for (int i = 0; i < 128; ++i)
8423                         if (!table[i])
8424                                 table[i] = static_cast<unsigned char>(i);
8425
8426                 void* result = alloc->allocate_nothrow(sizeof(table));
8427
8428                 if (result)
8429                 {
8430                         memcpy(result, table, sizeof(table));
8431                 }
8432
8433                 return static_cast<unsigned char*>(result);
8434         }
8435
8436         PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8437         {
8438                 char_t* write = buffer;
8439
8440                 while (*buffer)
8441                 {
8442                         char_t ch = *buffer++;
8443                         unsigned int index = static_cast<unsigned int>(ch);
8444
8445                         if (index < 128)
8446                         {
8447                                 unsigned char code = table[index];
8448
8449                                 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8450                                 // this code skips these characters without extra branches
8451                                 *write = static_cast<char_t>(code);
8452                                 write += 1 - (code >> 7);
8453                         }
8454                         else
8455                         {
8456                                 *write++ = ch;
8457                         }
8458                 }
8459
8460                 // zero-terminate
8461                 *write = 0;
8462
8463                 return write;
8464         }
8465
8466         inline bool is_xpath_attribute(const char_t* name)
8467         {
8468                 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8469         }
8470
8471         struct xpath_variable_boolean: xpath_variable
8472         {
8473                 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8474                 {
8475                 }
8476
8477                 bool value;
8478                 char_t name[1];
8479         };
8480
8481         struct xpath_variable_number: xpath_variable
8482         {
8483                 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8484                 {
8485                 }
8486
8487                 double value;
8488                 char_t name[1];
8489         };
8490
8491         struct xpath_variable_string: xpath_variable
8492         {
8493                 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8494                 {
8495                 }
8496
8497                 ~xpath_variable_string()
8498                 {
8499                         if (value) xml_memory::deallocate(value);
8500                 }
8501
8502                 char_t* value;
8503                 char_t name[1];
8504         };
8505
8506         struct xpath_variable_node_set: xpath_variable
8507         {
8508                 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8509                 {
8510                 }
8511
8512                 xpath_node_set value;
8513                 char_t name[1];
8514         };
8515
8516         static const xpath_node_set dummy_node_set;
8517
8518         PUGI__FN unsigned int hash_string(const char_t* str)
8519         {
8520                 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8521                 unsigned int result = 0;
8522
8523                 while (*str)
8524                 {
8525                         result += static_cast<unsigned int>(*str++);
8526                         result += result << 10;
8527                         result ^= result >> 6;
8528                 }
8529
8530                 result += result << 3;
8531                 result ^= result >> 11;
8532                 result += result << 15;
8533
8534                 return result;
8535         }
8536
8537         template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8538         {
8539                 size_t length = strlength(name);
8540                 if (length == 0) return 0; // empty variable names are invalid
8541
8542                 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8543                 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8544                 if (!memory) return 0;
8545
8546                 T* result = new (memory) T();
8547
8548                 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8549
8550                 return result;
8551         }
8552
8553         PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8554         {
8555                 switch (type)
8556                 {
8557                 case xpath_type_node_set:
8558                         return new_xpath_variable<xpath_variable_node_set>(name);
8559
8560                 case xpath_type_number:
8561                         return new_xpath_variable<xpath_variable_number>(name);
8562
8563                 case xpath_type_string:
8564                         return new_xpath_variable<xpath_variable_string>(name);
8565
8566                 case xpath_type_boolean:
8567                         return new_xpath_variable<xpath_variable_boolean>(name);
8568
8569                 default:
8570                         return 0;
8571                 }
8572         }
8573
8574         template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8575         {
8576                 var->~T();
8577                 xml_memory::deallocate(var);
8578         }
8579
8580         PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8581         {
8582                 switch (type)
8583                 {
8584                 case xpath_type_node_set:
8585                         delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8586                         break;
8587
8588                 case xpath_type_number:
8589                         delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8590                         break;
8591
8592                 case xpath_type_string:
8593                         delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8594                         break;
8595
8596                 case xpath_type_boolean:
8597                         delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8598                         break;
8599
8600                 default:
8601                         assert(false && "Invalid variable type");
8602                 }
8603         }
8604
8605         PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8606         {
8607                 switch (rhs->type())
8608                 {
8609                 case xpath_type_node_set:
8610                         return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8611
8612                 case xpath_type_number:
8613                         return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8614
8615                 case xpath_type_string:
8616                         return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8617
8618                 case xpath_type_boolean:
8619                         return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8620
8621                 default:
8622                         assert(false && "Invalid variable type");
8623                         return false;
8624                 }
8625         }
8626
8627         PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8628         {
8629                 size_t length = static_cast<size_t>(end - begin);
8630                 char_t* scratch = buffer;
8631
8632                 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8633                 {
8634                         // need to make dummy on-heap copy
8635                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8636                         if (!scratch) return false;
8637                 }
8638
8639                 // copy string to zero-terminated buffer and perform lookup
8640                 memcpy(scratch, begin, length * sizeof(char_t));
8641                 scratch[length] = 0;
8642
8643                 *out_result = set->get(scratch);
8644
8645                 // free dummy buffer
8646                 if (scratch != buffer) xml_memory::deallocate(scratch);
8647
8648                 return true;
8649         }
8650 PUGI__NS_END
8651
8652 // Internal node set class
8653 PUGI__NS_BEGIN
8654         PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8655         {
8656                 if (end - begin < 2)
8657                         return xpath_node_set::type_sorted;
8658
8659                 document_order_comparator cmp;
8660
8661                 bool first = cmp(begin[0], begin[1]);
8662
8663                 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8664                         if (cmp(it[0], it[1]) != first)
8665                                 return xpath_node_set::type_unsorted;
8666
8667                 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8668         }
8669
8670         PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8671         {
8672                 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8673
8674                 if (type == xpath_node_set::type_unsorted)
8675                 {
8676                         xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8677
8678                         if (sorted == xpath_node_set::type_unsorted)
8679                         {
8680                                 sort(begin, end, document_order_comparator());
8681
8682                                 type = xpath_node_set::type_sorted;
8683                         }
8684                         else
8685                                 type = sorted;
8686                 }
8687
8688                 if (type != order) reverse(begin, end);
8689
8690                 return order;
8691         }
8692
8693         PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8694         {
8695                 if (begin == end) return xpath_node();
8696
8697                 switch (type)
8698                 {
8699                 case xpath_node_set::type_sorted:
8700                         return *begin;
8701
8702                 case xpath_node_set::type_sorted_reverse:
8703                         return *(end - 1);
8704
8705                 case xpath_node_set::type_unsorted:
8706                         return *min_element(begin, end, document_order_comparator());
8707
8708                 default:
8709                         assert(false && "Invalid node set type");
8710                         return xpath_node();
8711                 }
8712         }
8713
8714         class xpath_node_set_raw
8715         {
8716                 xpath_node_set::type_t _type;
8717
8718                 xpath_node* _begin;
8719                 xpath_node* _end;
8720                 xpath_node* _eos;
8721
8722         public:
8723                 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8724                 {
8725                 }
8726
8727                 xpath_node* begin() const
8728                 {
8729                         return _begin;
8730                 }
8731
8732                 xpath_node* end() const
8733                 {
8734                         return _end;
8735                 }
8736
8737                 bool empty() const
8738                 {
8739                         return _begin == _end;
8740                 }
8741
8742                 size_t size() const
8743                 {
8744                         return static_cast<size_t>(_end - _begin);
8745                 }
8746
8747                 xpath_node first() const
8748                 {
8749                         return xpath_first(_begin, _end, _type);
8750                 }
8751
8752                 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8753
8754                 void push_back(const xpath_node& node, xpath_allocator* alloc)
8755                 {
8756                         if (_end != _eos)
8757                                 *_end++ = node;
8758                         else
8759                                 push_back_grow(node, alloc);
8760                 }
8761
8762                 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8763                 {
8764                         if (begin_ == end_) return;
8765
8766                         size_t size_ = static_cast<size_t>(_end - _begin);
8767                         size_t capacity = static_cast<size_t>(_eos - _begin);
8768                         size_t count = static_cast<size_t>(end_ - begin_);
8769
8770                         if (size_ + count > capacity)
8771                         {
8772                                 // reallocate the old array or allocate a new one
8773                                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8774                                 assert(data);
8775
8776                                 // finalize
8777                                 _begin = data;
8778                                 _end = data + size_;
8779                                 _eos = data + size_ + count;
8780                         }
8781
8782                         memcpy(_end, begin_, count * sizeof(xpath_node));
8783                         _end += count;
8784                 }
8785
8786                 void sort_do()
8787                 {
8788                         _type = xpath_sort(_begin, _end, _type, false);
8789                 }
8790
8791                 void truncate(xpath_node* pos)
8792                 {
8793                         assert(_begin <= pos && pos <= _end);
8794
8795                         _end = pos;
8796                 }
8797
8798                 void remove_duplicates()
8799                 {
8800                         if (_type == xpath_node_set::type_unsorted)
8801                                 sort(_begin, _end, duplicate_comparator());
8802
8803                         _end = unique(_begin, _end);
8804                 }
8805
8806                 xpath_node_set::type_t type() const
8807                 {
8808                         return _type;
8809                 }
8810
8811                 void set_type(xpath_node_set::type_t value)
8812                 {
8813                         _type = value;
8814                 }
8815         };
8816
8817         PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8818         {
8819                 size_t capacity = static_cast<size_t>(_eos - _begin);
8820
8821                 // get new capacity (1.5x rule)
8822                 size_t new_capacity = capacity + capacity / 2 + 1;
8823
8824                 // reallocate the old array or allocate a new one
8825                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8826                 assert(data);
8827
8828                 // finalize
8829                 _begin = data;
8830                 _end = data + capacity;
8831                 _eos = data + new_capacity;
8832
8833                 // push
8834                 *_end++ = node;
8835         }
8836 PUGI__NS_END
8837
8838 PUGI__NS_BEGIN
8839         struct xpath_context
8840         {
8841                 xpath_node n;
8842                 size_t position, size;
8843
8844                 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8845                 {
8846                 }
8847         };
8848
8849         enum lexeme_t
8850         {
8851                 lex_none = 0,
8852                 lex_equal,
8853                 lex_not_equal,
8854                 lex_less,
8855                 lex_greater,
8856                 lex_less_or_equal,
8857                 lex_greater_or_equal,
8858                 lex_plus,
8859                 lex_minus,
8860                 lex_multiply,
8861                 lex_union,
8862                 lex_var_ref,
8863                 lex_open_brace,
8864                 lex_close_brace,
8865                 lex_quoted_string,
8866                 lex_number,
8867                 lex_slash,
8868                 lex_double_slash,
8869                 lex_open_square_brace,
8870                 lex_close_square_brace,
8871                 lex_string,
8872                 lex_comma,
8873                 lex_axis_attribute,
8874                 lex_dot,
8875                 lex_double_dot,
8876                 lex_double_colon,
8877                 lex_eof
8878         };
8879
8880         struct xpath_lexer_string
8881         {
8882                 const char_t* begin;
8883                 const char_t* end;
8884
8885                 xpath_lexer_string(): begin(0), end(0)
8886                 {
8887                 }
8888
8889                 bool operator==(const char_t* other) const
8890                 {
8891                         size_t length = static_cast<size_t>(end - begin);
8892
8893                         return strequalrange(other, begin, length);
8894                 }
8895         };
8896
8897         class xpath_lexer
8898         {
8899                 const char_t* _cur;
8900                 const char_t* _cur_lexeme_pos;
8901                 xpath_lexer_string _cur_lexeme_contents;
8902
8903                 lexeme_t _cur_lexeme;
8904
8905         public:
8906                 explicit xpath_lexer(const char_t* query): _cur(query)
8907                 {
8908                         next();
8909                 }
8910
8911                 const char_t* state() const
8912                 {
8913                         return _cur;
8914                 }
8915
8916                 void next()
8917                 {
8918                         const char_t* cur = _cur;
8919
8920                         while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8921
8922                         // save lexeme position for error reporting
8923                         _cur_lexeme_pos = cur;
8924
8925                         switch (*cur)
8926                         {
8927                         case 0:
8928                                 _cur_lexeme = lex_eof;
8929                                 break;
8930
8931                         case '>':
8932                                 if (*(cur+1) == '=')
8933                                 {
8934                                         cur += 2;
8935                                         _cur_lexeme = lex_greater_or_equal;
8936                                 }
8937                                 else
8938                                 {
8939                                         cur += 1;
8940                                         _cur_lexeme = lex_greater;
8941                                 }
8942                                 break;
8943
8944                         case '<':
8945                                 if (*(cur+1) == '=')
8946                                 {
8947                                         cur += 2;
8948                                         _cur_lexeme = lex_less_or_equal;
8949                                 }
8950                                 else
8951                                 {
8952                                         cur += 1;
8953                                         _cur_lexeme = lex_less;
8954                                 }
8955                                 break;
8956
8957                         case '!':
8958                                 if (*(cur+1) == '=')
8959                                 {
8960                                         cur += 2;
8961                                         _cur_lexeme = lex_not_equal;
8962                                 }
8963                                 else
8964                                 {
8965                                         _cur_lexeme = lex_none;
8966                                 }
8967                                 break;
8968
8969                         case '=':
8970                                 cur += 1;
8971                                 _cur_lexeme = lex_equal;
8972
8973                                 break;
8974
8975                         case '+':
8976                                 cur += 1;
8977                                 _cur_lexeme = lex_plus;
8978
8979                                 break;
8980
8981                         case '-':
8982                                 cur += 1;
8983                                 _cur_lexeme = lex_minus;
8984
8985                                 break;
8986
8987                         case '*':
8988                                 cur += 1;
8989                                 _cur_lexeme = lex_multiply;
8990
8991                                 break;
8992
8993                         case '|':
8994                                 cur += 1;
8995                                 _cur_lexeme = lex_union;
8996
8997                                 break;
8998
8999                         case '$':
9000                                 cur += 1;
9001
9002                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9003                                 {
9004                                         _cur_lexeme_contents.begin = cur;
9005
9006                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9007
9008                                         if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9009                                         {
9010                                                 cur++; // :
9011
9012                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9013                                         }
9014
9015                                         _cur_lexeme_contents.end = cur;
9016
9017                                         _cur_lexeme = lex_var_ref;
9018                                 }
9019                                 else
9020                                 {
9021                                         _cur_lexeme = lex_none;
9022                                 }
9023
9024                                 break;
9025
9026                         case '(':
9027                                 cur += 1;
9028                                 _cur_lexeme = lex_open_brace;
9029
9030                                 break;
9031
9032                         case ')':
9033                                 cur += 1;
9034                                 _cur_lexeme = lex_close_brace;
9035
9036                                 break;
9037
9038                         case '[':
9039                                 cur += 1;
9040                                 _cur_lexeme = lex_open_square_brace;
9041
9042                                 break;
9043
9044                         case ']':
9045                                 cur += 1;
9046                                 _cur_lexeme = lex_close_square_brace;
9047
9048                                 break;
9049
9050                         case ',':
9051                                 cur += 1;
9052                                 _cur_lexeme = lex_comma;
9053
9054                                 break;
9055
9056                         case '/':
9057                                 if (*(cur+1) == '/')
9058                                 {
9059                                         cur += 2;
9060                                         _cur_lexeme = lex_double_slash;
9061                                 }
9062                                 else
9063                                 {
9064                                         cur += 1;
9065                                         _cur_lexeme = lex_slash;
9066                                 }
9067                                 break;
9068
9069                         case '.':
9070                                 if (*(cur+1) == '.')
9071                                 {
9072                                         cur += 2;
9073                                         _cur_lexeme = lex_double_dot;
9074                                 }
9075                                 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9076                                 {
9077                                         _cur_lexeme_contents.begin = cur; // .
9078
9079                                         ++cur;
9080
9081                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9082
9083                                         _cur_lexeme_contents.end = cur;
9084
9085                                         _cur_lexeme = lex_number;
9086                                 }
9087                                 else
9088                                 {
9089                                         cur += 1;
9090                                         _cur_lexeme = lex_dot;
9091                                 }
9092                                 break;
9093
9094                         case '@':
9095                                 cur += 1;
9096                                 _cur_lexeme = lex_axis_attribute;
9097
9098                                 break;
9099
9100                         case '"':
9101                         case '\'':
9102                         {
9103                                 char_t terminator = *cur;
9104
9105                                 ++cur;
9106
9107                                 _cur_lexeme_contents.begin = cur;
9108                                 while (*cur && *cur != terminator) cur++;
9109                                 _cur_lexeme_contents.end = cur;
9110
9111                                 if (!*cur)
9112                                         _cur_lexeme = lex_none;
9113                                 else
9114                                 {
9115                                         cur += 1;
9116                                         _cur_lexeme = lex_quoted_string;
9117                                 }
9118
9119                                 break;
9120                         }
9121
9122                         case ':':
9123                                 if (*(cur+1) == ':')
9124                                 {
9125                                         cur += 2;
9126                                         _cur_lexeme = lex_double_colon;
9127                                 }
9128                                 else
9129                                 {
9130                                         _cur_lexeme = lex_none;
9131                                 }
9132                                 break;
9133
9134                         default:
9135                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9136                                 {
9137                                         _cur_lexeme_contents.begin = cur;
9138
9139                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9140
9141                                         if (*cur == '.')
9142                                         {
9143                                                 cur++;
9144
9145                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9146                                         }
9147
9148                                         _cur_lexeme_contents.end = cur;
9149
9150                                         _cur_lexeme = lex_number;
9151                                 }
9152                                 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9153                                 {
9154                                         _cur_lexeme_contents.begin = cur;
9155
9156                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9157
9158                                         if (cur[0] == ':')
9159                                         {
9160                                                 if (cur[1] == '*') // namespace test ncname:*
9161                                                 {
9162                                                         cur += 2; // :*
9163                                                 }
9164                                                 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9165                                                 {
9166                                                         cur++; // :
9167
9168                                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9169                                                 }
9170                                         }
9171
9172                                         _cur_lexeme_contents.end = cur;
9173
9174                                         _cur_lexeme = lex_string;
9175                                 }
9176                                 else
9177                                 {
9178                                         _cur_lexeme = lex_none;
9179                                 }
9180                         }
9181
9182                         _cur = cur;
9183                 }
9184
9185                 lexeme_t current() const
9186                 {
9187                         return _cur_lexeme;
9188                 }
9189
9190                 const char_t* current_pos() const
9191                 {
9192                         return _cur_lexeme_pos;
9193                 }
9194
9195                 const xpath_lexer_string& contents() const
9196                 {
9197                         assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9198
9199                         return _cur_lexeme_contents;
9200                 }
9201         };
9202
9203         enum ast_type_t
9204         {
9205                 ast_unknown,
9206                 ast_op_or,                                              // left or right
9207                 ast_op_and,                                             // left and right
9208                 ast_op_equal,                                   // left = right
9209                 ast_op_not_equal,                               // left != right
9210                 ast_op_less,                                    // left < right
9211                 ast_op_greater,                                 // left > right
9212                 ast_op_less_or_equal,                   // left <= right
9213                 ast_op_greater_or_equal,                // left >= right
9214                 ast_op_add,                                             // left + right
9215                 ast_op_subtract,                                // left - right
9216                 ast_op_multiply,                                // left * right
9217                 ast_op_divide,                                  // left / right
9218                 ast_op_mod,                                             // left % right
9219                 ast_op_negate,                                  // left - right
9220                 ast_op_union,                                   // left | right
9221                 ast_predicate,                                  // apply predicate to set; next points to next predicate
9222                 ast_filter,                                             // select * from left where right
9223                 ast_string_constant,                    // string constant
9224                 ast_number_constant,                    // number constant
9225                 ast_variable,                                   // variable
9226                 ast_func_last,                                  // last()
9227                 ast_func_position,                              // position()
9228                 ast_func_count,                                 // count(left)
9229                 ast_func_id,                                    // id(left)
9230                 ast_func_local_name_0,                  // local-name()
9231                 ast_func_local_name_1,                  // local-name(left)
9232                 ast_func_namespace_uri_0,               // namespace-uri()
9233                 ast_func_namespace_uri_1,               // namespace-uri(left)
9234                 ast_func_name_0,                                // name()
9235                 ast_func_name_1,                                // name(left)
9236                 ast_func_string_0,                              // string()
9237                 ast_func_string_1,                              // string(left)
9238                 ast_func_concat,                                // concat(left, right, siblings)
9239                 ast_func_starts_with,                   // starts_with(left, right)
9240                 ast_func_contains,                              // contains(left, right)
9241                 ast_func_substring_before,              // substring-before(left, right)
9242                 ast_func_substring_after,               // substring-after(left, right)
9243                 ast_func_substring_2,                   // substring(left, right)
9244                 ast_func_substring_3,                   // substring(left, right, third)
9245                 ast_func_string_length_0,               // string-length()
9246                 ast_func_string_length_1,               // string-length(left)
9247                 ast_func_normalize_space_0,             // normalize-space()
9248                 ast_func_normalize_space_1,             // normalize-space(left)
9249                 ast_func_translate,                             // translate(left, right, third)
9250                 ast_func_boolean,                               // boolean(left)
9251                 ast_func_not,                                   // not(left)
9252                 ast_func_true,                                  // true()
9253                 ast_func_false,                                 // false()
9254                 ast_func_lang,                                  // lang(left)
9255                 ast_func_number_0,                              // number()
9256                 ast_func_number_1,                              // number(left)
9257                 ast_func_sum,                                   // sum(left)
9258                 ast_func_floor,                                 // floor(left)
9259                 ast_func_ceiling,                               // ceiling(left)
9260                 ast_func_round,                                 // round(left)
9261                 ast_step,                                               // process set left with step
9262                 ast_step_root,                                  // select root node
9263
9264                 ast_opt_translate_table,                // translate(left, right, third) where right/third are constants
9265                 ast_opt_compare_attribute               // @name = 'string'
9266         };
9267
9268         enum axis_t
9269         {
9270                 axis_ancestor,
9271                 axis_ancestor_or_self,
9272                 axis_attribute,
9273                 axis_child,
9274                 axis_descendant,
9275                 axis_descendant_or_self,
9276                 axis_following,
9277                 axis_following_sibling,
9278                 axis_namespace,
9279                 axis_parent,
9280                 axis_preceding,
9281                 axis_preceding_sibling,
9282                 axis_self
9283         };
9284
9285         enum nodetest_t
9286         {
9287                 nodetest_none,
9288                 nodetest_name,
9289                 nodetest_type_node,
9290                 nodetest_type_comment,
9291                 nodetest_type_pi,
9292                 nodetest_type_text,
9293                 nodetest_pi,
9294                 nodetest_all,
9295                 nodetest_all_in_namespace
9296         };
9297
9298         enum predicate_t
9299         {
9300                 predicate_default,
9301                 predicate_posinv,
9302                 predicate_constant,
9303                 predicate_constant_one
9304         };
9305
9306         enum nodeset_eval_t
9307         {
9308                 nodeset_eval_all,
9309                 nodeset_eval_any,
9310                 nodeset_eval_first
9311         };
9312
9313         template <axis_t N> struct axis_to_type
9314         {
9315                 static const axis_t axis;
9316         };
9317
9318         template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9319
9320         class xpath_ast_node
9321         {
9322         private:
9323                 // node type
9324                 char _type;
9325                 char _rettype;
9326
9327                 // for ast_step
9328                 char _axis;
9329
9330                 // for ast_step/ast_predicate/ast_filter
9331                 char _test;
9332
9333                 // tree node structure
9334                 xpath_ast_node* _left;
9335                 xpath_ast_node* _right;
9336                 xpath_ast_node* _next;
9337
9338                 union
9339                 {
9340                         // value for ast_string_constant
9341                         const char_t* string;
9342                         // value for ast_number_constant
9343                         double number;
9344                         // variable for ast_variable
9345                         xpath_variable* variable;
9346                         // node test for ast_step (node name/namespace/node type/pi target)
9347                         const char_t* nodetest;
9348                         // table for ast_opt_translate_table
9349                         const unsigned char* table;
9350                 } _data;
9351
9352                 xpath_ast_node(const xpath_ast_node&);
9353                 xpath_ast_node& operator=(const xpath_ast_node&);
9354
9355                 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9356                 {
9357                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9358
9359                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9360                         {
9361                                 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9362                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9363                                 else if (lt == xpath_type_number || rt == xpath_type_number)
9364                                         return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9365                                 else if (lt == xpath_type_string || rt == xpath_type_string)
9366                                 {
9367                                         xpath_allocator_capture cr(stack.result);
9368
9369                                         xpath_string ls = lhs->eval_string(c, stack);
9370                                         xpath_string rs = rhs->eval_string(c, stack);
9371
9372                                         return comp(ls, rs);
9373                                 }
9374                         }
9375                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9376                         {
9377                                 xpath_allocator_capture cr(stack.result);
9378
9379                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9380                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9381
9382                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9383                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9384                                         {
9385                                                 xpath_allocator_capture cri(stack.result);
9386
9387                                                 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9388                                                         return true;
9389                                         }
9390
9391                                 return false;
9392                         }
9393                         else
9394                         {
9395                                 if (lt == xpath_type_node_set)
9396                                 {
9397                                         swap(lhs, rhs);
9398                                         swap(lt, rt);
9399                                 }
9400
9401                                 if (lt == xpath_type_boolean)
9402                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9403                                 else if (lt == xpath_type_number)
9404                                 {
9405                                         xpath_allocator_capture cr(stack.result);
9406
9407                                         double l = lhs->eval_number(c, stack);
9408                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9409
9410                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9411                                         {
9412                                                 xpath_allocator_capture cri(stack.result);
9413
9414                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9415                                                         return true;
9416                                         }
9417
9418                                         return false;
9419                                 }
9420                                 else if (lt == xpath_type_string)
9421                                 {
9422                                         xpath_allocator_capture cr(stack.result);
9423
9424                                         xpath_string l = lhs->eval_string(c, stack);
9425                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9426
9427                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9428                                         {
9429                                                 xpath_allocator_capture cri(stack.result);
9430
9431                                                 if (comp(l, string_value(*ri, stack.result)))
9432                                                         return true;
9433                                         }
9434
9435                                         return false;
9436                                 }
9437                         }
9438
9439                         assert(false && "Wrong types");
9440                         return false;
9441                 }
9442
9443                 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9444                 {
9445                         return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9446                 }
9447
9448                 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9449                 {
9450                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9451
9452                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9453                                 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9454                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9455                         {
9456                                 xpath_allocator_capture cr(stack.result);
9457
9458                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9459                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9460
9461                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9462                                 {
9463                                         xpath_allocator_capture cri(stack.result);
9464
9465                                         double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9466
9467                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9468                                         {
9469                                                 xpath_allocator_capture crii(stack.result);
9470
9471                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9472                                                         return true;
9473                                         }
9474                                 }
9475
9476                                 return false;
9477                         }
9478                         else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9479                         {
9480                                 xpath_allocator_capture cr(stack.result);
9481
9482                                 double l = lhs->eval_number(c, stack);
9483                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9484
9485                                 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9486                                 {
9487                                         xpath_allocator_capture cri(stack.result);
9488
9489                                         if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9490                                                 return true;
9491                                 }
9492
9493                                 return false;
9494                         }
9495                         else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9496                         {
9497                                 xpath_allocator_capture cr(stack.result);
9498
9499                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9500                                 double r = rhs->eval_number(c, stack);
9501
9502                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9503                                 {
9504                                         xpath_allocator_capture cri(stack.result);
9505
9506                                         if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9507                                                 return true;
9508                                 }
9509
9510                                 return false;
9511                         }
9512                         else
9513                         {
9514                                 assert(false && "Wrong types");
9515                                 return false;
9516                         }
9517                 }
9518
9519                 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9520                 {
9521                         assert(ns.size() >= first);
9522                         assert(expr->rettype() != xpath_type_number);
9523
9524                         size_t i = 1;
9525                         size_t size = ns.size() - first;
9526
9527                         xpath_node* last = ns.begin() + first;
9528
9529                         // remove_if... or well, sort of
9530                         for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9531                         {
9532                                 xpath_context c(*it, i, size);
9533
9534                                 if (expr->eval_boolean(c, stack))
9535                                 {
9536                                         *last++ = *it;
9537
9538                                         if (once) break;
9539                                 }
9540                         }
9541
9542                         ns.truncate(last);
9543                 }
9544
9545                 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9546                 {
9547                         assert(ns.size() >= first);
9548                         assert(expr->rettype() == xpath_type_number);
9549
9550                         size_t i = 1;
9551                         size_t size = ns.size() - first;
9552
9553                         xpath_node* last = ns.begin() + first;
9554
9555                         // remove_if... or well, sort of
9556                         for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9557                         {
9558                                 xpath_context c(*it, i, size);
9559
9560                                 if (expr->eval_number(c, stack) == i)
9561                                 {
9562                                         *last++ = *it;
9563
9564                                         if (once) break;
9565                                 }
9566                         }
9567
9568                         ns.truncate(last);
9569                 }
9570
9571                 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9572                 {
9573                         assert(ns.size() >= first);
9574                         assert(expr->rettype() == xpath_type_number);
9575
9576                         size_t size = ns.size() - first;
9577
9578                         xpath_node* last = ns.begin() + first;
9579
9580                         xpath_context c(xpath_node(), 1, size);
9581
9582                         double er = expr->eval_number(c, stack);
9583
9584                         if (er >= 1.0 && er <= size)
9585                         {
9586                                 size_t eri = static_cast<size_t>(er);
9587
9588                                 if (er == eri)
9589                                 {
9590                                         xpath_node r = last[eri - 1];
9591
9592                                         *last++ = r;
9593                                 }
9594                         }
9595
9596                         ns.truncate(last);
9597                 }
9598
9599                 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9600                 {
9601                         if (ns.size() == first) return;
9602
9603                         assert(_type == ast_filter || _type == ast_predicate);
9604
9605                         if (_test == predicate_constant || _test == predicate_constant_one)
9606                                 apply_predicate_number_const(ns, first, _right, stack);
9607                         else if (_right->rettype() == xpath_type_number)
9608                                 apply_predicate_number(ns, first, _right, stack, once);
9609                         else
9610                                 apply_predicate_boolean(ns, first, _right, stack, once);
9611                 }
9612
9613                 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9614                 {
9615                         if (ns.size() == first) return;
9616
9617                         bool last_once = eval_once(ns.type(), eval);
9618
9619                         for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9620                                 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9621                 }
9622
9623                 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9624                 {
9625                         assert(a);
9626
9627                         const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9628
9629                         switch (_test)
9630                         {
9631                         case nodetest_name:
9632                                 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9633                                 {
9634                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9635                                         return true;
9636                                 }
9637                                 break;
9638
9639                         case nodetest_type_node:
9640                         case nodetest_all:
9641                                 if (is_xpath_attribute(name))
9642                                 {
9643                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9644                                         return true;
9645                                 }
9646                                 break;
9647
9648                         case nodetest_all_in_namespace:
9649                                 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9650                                 {
9651                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9652                                         return true;
9653                                 }
9654                                 break;
9655
9656                         default:
9657                                 ;
9658                         }
9659
9660                         return false;
9661                 }
9662
9663                 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9664                 {
9665                         assert(n);
9666
9667                         xml_node_type type = PUGI__NODETYPE(n);
9668
9669                         switch (_test)
9670                         {
9671                         case nodetest_name:
9672                                 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9673                                 {
9674                                         ns.push_back(xml_node(n), alloc);
9675                                         return true;
9676                                 }
9677                                 break;
9678
9679                         case nodetest_type_node:
9680                                 ns.push_back(xml_node(n), alloc);
9681                                 return true;
9682
9683                         case nodetest_type_comment:
9684                                 if (type == node_comment)
9685                                 {
9686                                         ns.push_back(xml_node(n), alloc);
9687                                         return true;
9688                                 }
9689                                 break;
9690
9691                         case nodetest_type_text:
9692                                 if (type == node_pcdata || type == node_cdata)
9693                                 {
9694                                         ns.push_back(xml_node(n), alloc);
9695                                         return true;
9696                                 }
9697                                 break;
9698
9699                         case nodetest_type_pi:
9700                                 if (type == node_pi)
9701                                 {
9702                                         ns.push_back(xml_node(n), alloc);
9703                                         return true;
9704                                 }
9705                                 break;
9706
9707                         case nodetest_pi:
9708                                 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9709                                 {
9710                                         ns.push_back(xml_node(n), alloc);
9711                                         return true;
9712                                 }
9713                                 break;
9714
9715                         case nodetest_all:
9716                                 if (type == node_element)
9717                                 {
9718                                         ns.push_back(xml_node(n), alloc);
9719                                         return true;
9720                                 }
9721                                 break;
9722
9723                         case nodetest_all_in_namespace:
9724                                 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9725                                 {
9726                                         ns.push_back(xml_node(n), alloc);
9727                                         return true;
9728                                 }
9729                                 break;
9730
9731                         default:
9732                                 assert(false && "Unknown axis");
9733                         }
9734
9735                         return false;
9736                 }
9737
9738                 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9739                 {
9740                         const axis_t axis = T::axis;
9741
9742                         switch (axis)
9743                         {
9744                         case axis_attribute:
9745                         {
9746                                 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9747                                         if (step_push(ns, a, n, alloc) & once)
9748                                                 return;
9749
9750                                 break;
9751                         }
9752
9753                         case axis_child:
9754                         {
9755                                 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9756                                         if (step_push(ns, c, alloc) & once)
9757                                                 return;
9758
9759                                 break;
9760                         }
9761
9762                         case axis_descendant:
9763                         case axis_descendant_or_self:
9764                         {
9765                                 if (axis == axis_descendant_or_self)
9766                                         if (step_push(ns, n, alloc) & once)
9767                                                 return;
9768
9769                                 xml_node_struct* cur = n->first_child;
9770
9771                                 while (cur)
9772                                 {
9773                                         if (step_push(ns, cur, alloc) & once)
9774                                                 return;
9775
9776                                         if (cur->first_child)
9777                                                 cur = cur->first_child;
9778                                         else
9779                                         {
9780                                                 while (!cur->next_sibling)
9781                                                 {
9782                                                         cur = cur->parent;
9783
9784                                                         if (cur == n) return;
9785                                                 }
9786
9787                                                 cur = cur->next_sibling;
9788                                         }
9789                                 }
9790
9791                                 break;
9792                         }
9793
9794                         case axis_following_sibling:
9795                         {
9796                                 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9797                                         if (step_push(ns, c, alloc) & once)
9798                                                 return;
9799
9800                                 break;
9801                         }
9802
9803                         case axis_preceding_sibling:
9804                         {
9805                                 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9806                                         if (step_push(ns, c, alloc) & once)
9807                                                 return;
9808
9809                                 break;
9810                         }
9811
9812                         case axis_following:
9813                         {
9814                                 xml_node_struct* cur = n;
9815
9816                                 // exit from this node so that we don't include descendants
9817                                 while (!cur->next_sibling)
9818                                 {
9819                                         cur = cur->parent;
9820
9821                                         if (!cur) return;
9822                                 }
9823
9824                                 cur = cur->next_sibling;
9825
9826                                 while (cur)
9827                                 {
9828                                         if (step_push(ns, cur, alloc) & once)
9829                                                 return;
9830
9831                                         if (cur->first_child)
9832                                                 cur = cur->first_child;
9833                                         else
9834                                         {
9835                                                 while (!cur->next_sibling)
9836                                                 {
9837                                                         cur = cur->parent;
9838
9839                                                         if (!cur) return;
9840                                                 }
9841
9842                                                 cur = cur->next_sibling;
9843                                         }
9844                                 }
9845
9846                                 break;
9847                         }
9848
9849                         case axis_preceding:
9850                         {
9851                                 xml_node_struct* cur = n;
9852
9853                                 // exit from this node so that we don't include descendants
9854                                 while (!cur->prev_sibling_c->next_sibling)
9855                                 {
9856                                         cur = cur->parent;
9857
9858                                         if (!cur) return;
9859                                 }
9860
9861                                 cur = cur->prev_sibling_c;
9862
9863                                 while (cur)
9864                                 {
9865                                         if (cur->first_child)
9866                                                 cur = cur->first_child->prev_sibling_c;
9867                                         else
9868                                         {
9869                                                 // leaf node, can't be ancestor
9870                                                 if (step_push(ns, cur, alloc) & once)
9871                                                         return;
9872
9873                                                 while (!cur->prev_sibling_c->next_sibling)
9874                                                 {
9875                                                         cur = cur->parent;
9876
9877                                                         if (!cur) return;
9878
9879                                                         if (!node_is_ancestor(cur, n))
9880                                                                 if (step_push(ns, cur, alloc) & once)
9881                                                                         return;
9882                                                 }
9883
9884                                                 cur = cur->prev_sibling_c;
9885                                         }
9886                                 }
9887
9888                                 break;
9889                         }
9890
9891                         case axis_ancestor:
9892                         case axis_ancestor_or_self:
9893                         {
9894                                 if (axis == axis_ancestor_or_self)
9895                                         if (step_push(ns, n, alloc) & once)
9896                                                 return;
9897
9898                                 xml_node_struct* cur = n->parent;
9899
9900                                 while (cur)
9901                                 {
9902                                         if (step_push(ns, cur, alloc) & once)
9903                                                 return;
9904
9905                                         cur = cur->parent;
9906                                 }
9907
9908                                 break;
9909                         }
9910
9911                         case axis_self:
9912                         {
9913                                 step_push(ns, n, alloc);
9914
9915                                 break;
9916                         }
9917
9918                         case axis_parent:
9919                         {
9920                                 if (n->parent)
9921                                         step_push(ns, n->parent, alloc);
9922
9923                                 break;
9924                         }
9925
9926                         default:
9927                                 assert(false && "Unimplemented axis");
9928                         }
9929                 }
9930
9931                 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9932                 {
9933                         const axis_t axis = T::axis;
9934
9935                         switch (axis)
9936                         {
9937                         case axis_ancestor:
9938                         case axis_ancestor_or_self:
9939                         {
9940                                 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9941                                         if (step_push(ns, a, p, alloc) & once)
9942                                                 return;
9943
9944                                 xml_node_struct* cur = p;
9945
9946                                 while (cur)
9947                                 {
9948                                         if (step_push(ns, cur, alloc) & once)
9949                                                 return;
9950
9951                                         cur = cur->parent;
9952                                 }
9953
9954                                 break;
9955                         }
9956
9957                         case axis_descendant_or_self:
9958                         case axis_self:
9959                         {
9960                                 if (_test == nodetest_type_node) // reject attributes based on principal node type test
9961                                         step_push(ns, a, p, alloc);
9962
9963                                 break;
9964                         }
9965
9966                         case axis_following:
9967                         {
9968                                 xml_node_struct* cur = p;
9969
9970                                 while (cur)
9971                                 {
9972                                         if (cur->first_child)
9973                                                 cur = cur->first_child;
9974                                         else
9975                                         {
9976                                                 while (!cur->next_sibling)
9977                                                 {
9978                                                         cur = cur->parent;
9979
9980                                                         if (!cur) return;
9981                                                 }
9982
9983                                                 cur = cur->next_sibling;
9984                                         }
9985
9986                                         if (step_push(ns, cur, alloc) & once)
9987                                                 return;
9988                                 }
9989
9990                                 break;
9991                         }
9992
9993                         case axis_parent:
9994                         {
9995                                 step_push(ns, p, alloc);
9996
9997                                 break;
9998                         }
9999
10000                         case axis_preceding:
10001                         {
10002                                 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10003                                 step_fill(ns, p, alloc, once, v);
10004                                 break;
10005                         }
10006
10007                         default:
10008                                 assert(false && "Unimplemented axis");
10009                         }
10010                 }
10011
10012                 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10013                 {
10014                         const axis_t axis = T::axis;
10015                         const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10016
10017                         if (xn.node())
10018                                 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10019                         else if (axis_has_attributes && xn.attribute() && xn.parent())
10020                                 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10021                 }
10022
10023                 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10024                 {
10025                         const axis_t axis = T::axis;
10026                         const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10027                         const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10028
10029                         bool once =
10030                                 (axis == axis_attribute && _test == nodetest_name) ||
10031                                 (!_right && eval_once(axis_type, eval)) ||
10032                                 (_right && !_right->_next && _right->_test == predicate_constant_one);
10033
10034                         xpath_node_set_raw ns;
10035                         ns.set_type(axis_type);
10036
10037                         if (_left)
10038                         {
10039                                 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10040
10041                                 // self axis preserves the original order
10042                                 if (axis == axis_self) ns.set_type(s.type());
10043
10044                                 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10045                                 {
10046                                         size_t size = ns.size();
10047
10048                                         // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10049                                         if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10050
10051                                         step_fill(ns, *it, stack.result, once, v);
10052                                         if (_right) apply_predicates(ns, size, stack, eval);
10053                                 }
10054                         }
10055                         else
10056                         {
10057                                 step_fill(ns, c.n, stack.result, once, v);
10058                                 if (_right) apply_predicates(ns, 0, stack, eval);
10059                         }
10060
10061                         // child, attribute and self axes always generate unique set of nodes
10062                         // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10063                         if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10064                                 ns.remove_duplicates();
10065
10066                         return ns;
10067                 }
10068
10069         public:
10070                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10071                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10072                 {
10073                         assert(type == ast_string_constant);
10074                         _data.string = value;
10075                 }
10076
10077                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10078                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10079                 {
10080                         assert(type == ast_number_constant);
10081                         _data.number = value;
10082                 }
10083
10084                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10085                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10086                 {
10087                         assert(type == ast_variable);
10088                         _data.variable = value;
10089                 }
10090
10091                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10092                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10093                 {
10094                 }
10095
10096                 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10097                         _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10098                 {
10099                         assert(type == ast_step);
10100                         _data.nodetest = contents;
10101                 }
10102
10103                 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10104                         _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10105                 {
10106                         assert(type == ast_filter || type == ast_predicate);
10107                 }
10108
10109                 void set_next(xpath_ast_node* value)
10110                 {
10111                         _next = value;
10112                 }
10113
10114                 void set_right(xpath_ast_node* value)
10115                 {
10116                         _right = value;
10117                 }
10118
10119                 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10120                 {
10121                         switch (_type)
10122                         {
10123                         case ast_op_or:
10124                                 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10125
10126                         case ast_op_and:
10127                                 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10128
10129                         case ast_op_equal:
10130                                 return compare_eq(_left, _right, c, stack, equal_to());
10131
10132                         case ast_op_not_equal:
10133                                 return compare_eq(_left, _right, c, stack, not_equal_to());
10134
10135                         case ast_op_less:
10136                                 return compare_rel(_left, _right, c, stack, less());
10137
10138                         case ast_op_greater:
10139                                 return compare_rel(_right, _left, c, stack, less());
10140
10141                         case ast_op_less_or_equal:
10142                                 return compare_rel(_left, _right, c, stack, less_equal());
10143
10144                         case ast_op_greater_or_equal:
10145                                 return compare_rel(_right, _left, c, stack, less_equal());
10146
10147                         case ast_func_starts_with:
10148                         {
10149                                 xpath_allocator_capture cr(stack.result);
10150
10151                                 xpath_string lr = _left->eval_string(c, stack);
10152                                 xpath_string rr = _right->eval_string(c, stack);
10153
10154                                 return starts_with(lr.c_str(), rr.c_str());
10155                         }
10156
10157                         case ast_func_contains:
10158                         {
10159                                 xpath_allocator_capture cr(stack.result);
10160
10161                                 xpath_string lr = _left->eval_string(c, stack);
10162                                 xpath_string rr = _right->eval_string(c, stack);
10163
10164                                 return find_substring(lr.c_str(), rr.c_str()) != 0;
10165                         }
10166
10167                         case ast_func_boolean:
10168                                 return _left->eval_boolean(c, stack);
10169
10170                         case ast_func_not:
10171                                 return !_left->eval_boolean(c, stack);
10172
10173                         case ast_func_true:
10174                                 return true;
10175
10176                         case ast_func_false:
10177                                 return false;
10178
10179                         case ast_func_lang:
10180                         {
10181                                 if (c.n.attribute()) return false;
10182
10183                                 xpath_allocator_capture cr(stack.result);
10184
10185                                 xpath_string lang = _left->eval_string(c, stack);
10186
10187                                 for (xml_node n = c.n.node(); n; n = n.parent())
10188                                 {
10189                                         xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10190
10191                                         if (a)
10192                                         {
10193                                                 const char_t* value = a.value();
10194
10195                                                 // strnicmp / strncasecmp is not portable
10196                                                 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10197                                                 {
10198                                                         if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10199                                                         ++value;
10200                                                 }
10201
10202                                                 return *value == 0 || *value == '-';
10203                                         }
10204                                 }
10205
10206                                 return false;
10207                         }
10208
10209                         case ast_opt_compare_attribute:
10210                         {
10211                                 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10212
10213                                 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10214
10215                                 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10216                         }
10217
10218                         case ast_variable:
10219                         {
10220                                 assert(_rettype == _data.variable->type());
10221
10222                                 if (_rettype == xpath_type_boolean)
10223                                         return _data.variable->get_boolean();
10224
10225                                 // fallthrough to type conversion
10226                         }
10227
10228                         default:
10229                         {
10230                                 switch (_rettype)
10231                                 {
10232                                 case xpath_type_number:
10233                                         return convert_number_to_boolean(eval_number(c, stack));
10234
10235                                 case xpath_type_string:
10236                                 {
10237                                         xpath_allocator_capture cr(stack.result);
10238
10239                                         return !eval_string(c, stack).empty();
10240                                 }
10241
10242                                 case xpath_type_node_set:
10243                                 {
10244                                         xpath_allocator_capture cr(stack.result);
10245
10246                                         return !eval_node_set(c, stack, nodeset_eval_any).empty();
10247                                 }
10248
10249                                 default:
10250                                         assert(false && "Wrong expression for return type boolean");
10251                                         return false;
10252                                 }
10253                         }
10254                         }
10255                 }
10256
10257                 double eval_number(const xpath_context& c, const xpath_stack& stack)
10258                 {
10259                         switch (_type)
10260                         {
10261                         case ast_op_add:
10262                                 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10263
10264                         case ast_op_subtract:
10265                                 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10266
10267                         case ast_op_multiply:
10268                                 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10269
10270                         case ast_op_divide:
10271                                 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10272
10273                         case ast_op_mod:
10274                                 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10275
10276                         case ast_op_negate:
10277                                 return -_left->eval_number(c, stack);
10278
10279                         case ast_number_constant:
10280                                 return _data.number;
10281
10282                         case ast_func_last:
10283                                 return static_cast<double>(c.size);
10284
10285                         case ast_func_position:
10286                                 return static_cast<double>(c.position);
10287
10288                         case ast_func_count:
10289                         {
10290                                 xpath_allocator_capture cr(stack.result);
10291
10292                                 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10293                         }
10294
10295                         case ast_func_string_length_0:
10296                         {
10297                                 xpath_allocator_capture cr(stack.result);
10298
10299                                 return static_cast<double>(string_value(c.n, stack.result).length());
10300                         }
10301
10302                         case ast_func_string_length_1:
10303                         {
10304                                 xpath_allocator_capture cr(stack.result);
10305
10306                                 return static_cast<double>(_left->eval_string(c, stack).length());
10307                         }
10308
10309                         case ast_func_number_0:
10310                         {
10311                                 xpath_allocator_capture cr(stack.result);
10312
10313                                 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10314                         }
10315
10316                         case ast_func_number_1:
10317                                 return _left->eval_number(c, stack);
10318
10319                         case ast_func_sum:
10320                         {
10321                                 xpath_allocator_capture cr(stack.result);
10322
10323                                 double r = 0;
10324
10325                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10326
10327                                 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10328                                 {
10329                                         xpath_allocator_capture cri(stack.result);
10330
10331                                         r += convert_string_to_number(string_value(*it, stack.result).c_str());
10332                                 }
10333
10334                                 return r;
10335                         }
10336
10337                         case ast_func_floor:
10338                         {
10339                                 double r = _left->eval_number(c, stack);
10340
10341                                 return r == r ? floor(r) : r;
10342                         }
10343
10344                         case ast_func_ceiling:
10345                         {
10346                                 double r = _left->eval_number(c, stack);
10347
10348                                 return r == r ? ceil(r) : r;
10349                         }
10350
10351                         case ast_func_round:
10352                                 return round_nearest_nzero(_left->eval_number(c, stack));
10353
10354                         case ast_variable:
10355                         {
10356                                 assert(_rettype == _data.variable->type());
10357
10358                                 if (_rettype == xpath_type_number)
10359                                         return _data.variable->get_number();
10360
10361                                 // fallthrough to type conversion
10362                         }
10363
10364                         default:
10365                         {
10366                                 switch (_rettype)
10367                                 {
10368                                 case xpath_type_boolean:
10369                                         return eval_boolean(c, stack) ? 1 : 0;
10370
10371                                 case xpath_type_string:
10372                                 {
10373                                         xpath_allocator_capture cr(stack.result);
10374
10375                                         return convert_string_to_number(eval_string(c, stack).c_str());
10376                                 }
10377
10378                                 case xpath_type_node_set:
10379                                 {
10380                                         xpath_allocator_capture cr(stack.result);
10381
10382                                         return convert_string_to_number(eval_string(c, stack).c_str());
10383                                 }
10384
10385                                 default:
10386                                         assert(false && "Wrong expression for return type number");
10387                                         return 0;
10388                                 }
10389
10390                         }
10391                         }
10392                 }
10393
10394                 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10395                 {
10396                         assert(_type == ast_func_concat);
10397
10398                         xpath_allocator_capture ct(stack.temp);
10399
10400                         // count the string number
10401                         size_t count = 1;
10402                         for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10403
10404                         // gather all strings
10405                         xpath_string static_buffer[4];
10406                         xpath_string* buffer = static_buffer;
10407
10408                         // allocate on-heap for large concats
10409                         if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
10410                         {
10411                                 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10412                                 assert(buffer);
10413                         }
10414
10415                         // evaluate all strings to temporary stack
10416                         xpath_stack swapped_stack = {stack.temp, stack.result};
10417
10418                         buffer[0] = _left->eval_string(c, swapped_stack);
10419
10420                         size_t pos = 1;
10421                         for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10422                         assert(pos == count);
10423
10424                         // get total length
10425                         size_t length = 0;
10426                         for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10427
10428                         // create final string
10429                         char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10430                         assert(result);
10431
10432                         char_t* ri = result;
10433
10434                         for (size_t j = 0; j < count; ++j)
10435                                 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10436                                         *ri++ = *bi;
10437
10438                         *ri = 0;
10439
10440                         return xpath_string::from_heap_preallocated(result, ri);
10441                 }
10442
10443                 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10444                 {
10445                         switch (_type)
10446                         {
10447                         case ast_string_constant:
10448                                 return xpath_string::from_const(_data.string);
10449
10450                         case ast_func_local_name_0:
10451                         {
10452                                 xpath_node na = c.n;
10453
10454                                 return xpath_string::from_const(local_name(na));
10455                         }
10456
10457                         case ast_func_local_name_1:
10458                         {
10459                                 xpath_allocator_capture cr(stack.result);
10460
10461                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10462                                 xpath_node na = ns.first();
10463
10464                                 return xpath_string::from_const(local_name(na));
10465                         }
10466
10467                         case ast_func_name_0:
10468                         {
10469                                 xpath_node na = c.n;
10470
10471                                 return xpath_string::from_const(qualified_name(na));
10472                         }
10473
10474                         case ast_func_name_1:
10475                         {
10476                                 xpath_allocator_capture cr(stack.result);
10477
10478                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10479                                 xpath_node na = ns.first();
10480
10481                                 return xpath_string::from_const(qualified_name(na));
10482                         }
10483
10484                         case ast_func_namespace_uri_0:
10485                         {
10486                                 xpath_node na = c.n;
10487
10488                                 return xpath_string::from_const(namespace_uri(na));
10489                         }
10490
10491                         case ast_func_namespace_uri_1:
10492                         {
10493                                 xpath_allocator_capture cr(stack.result);
10494
10495                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10496                                 xpath_node na = ns.first();
10497
10498                                 return xpath_string::from_const(namespace_uri(na));
10499                         }
10500
10501                         case ast_func_string_0:
10502                                 return string_value(c.n, stack.result);
10503
10504                         case ast_func_string_1:
10505                                 return _left->eval_string(c, stack);
10506
10507                         case ast_func_concat:
10508                                 return eval_string_concat(c, stack);
10509
10510                         case ast_func_substring_before:
10511                         {
10512                                 xpath_allocator_capture cr(stack.temp);
10513
10514                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10515
10516                                 xpath_string s = _left->eval_string(c, swapped_stack);
10517                                 xpath_string p = _right->eval_string(c, swapped_stack);
10518
10519                                 const char_t* pos = find_substring(s.c_str(), p.c_str());
10520
10521                                 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10522                         }
10523
10524                         case ast_func_substring_after:
10525                         {
10526                                 xpath_allocator_capture cr(stack.temp);
10527
10528                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10529
10530                                 xpath_string s = _left->eval_string(c, swapped_stack);
10531                                 xpath_string p = _right->eval_string(c, swapped_stack);
10532
10533                                 const char_t* pos = find_substring(s.c_str(), p.c_str());
10534                                 if (!pos) return xpath_string();
10535
10536                                 const char_t* rbegin = pos + p.length();
10537                                 const char_t* rend = s.c_str() + s.length();
10538
10539                                 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10540                         }
10541
10542                         case ast_func_substring_2:
10543                         {
10544                                 xpath_allocator_capture cr(stack.temp);
10545
10546                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10547
10548                                 xpath_string s = _left->eval_string(c, swapped_stack);
10549                                 size_t s_length = s.length();
10550
10551                                 double first = round_nearest(_right->eval_number(c, stack));
10552
10553                                 if (is_nan(first)) return xpath_string(); // NaN
10554                                 else if (first >= s_length + 1) return xpath_string();
10555
10556                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10557                                 assert(1 <= pos && pos <= s_length + 1);
10558
10559                                 const char_t* rbegin = s.c_str() + (pos - 1);
10560                                 const char_t* rend = s.c_str() + s.length();
10561
10562                                 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10563                         }
10564
10565                         case ast_func_substring_3:
10566                         {
10567                                 xpath_allocator_capture cr(stack.temp);
10568
10569                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10570
10571                                 xpath_string s = _left->eval_string(c, swapped_stack);
10572                                 size_t s_length = s.length();
10573
10574                                 double first = round_nearest(_right->eval_number(c, stack));
10575                                 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10576
10577                                 if (is_nan(first) || is_nan(last)) return xpath_string();
10578                                 else if (first >= s_length + 1) return xpath_string();
10579                                 else if (first >= last) return xpath_string();
10580                                 else if (last < 1) return xpath_string();
10581
10582                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10583                                 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10584
10585                                 assert(1 <= pos && pos <= end && end <= s_length + 1);
10586                                 const char_t* rbegin = s.c_str() + (pos - 1);
10587                                 const char_t* rend = s.c_str() + (end - 1);
10588
10589                                 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10590                         }
10591
10592                         case ast_func_normalize_space_0:
10593                         {
10594                                 xpath_string s = string_value(c.n, stack.result);
10595
10596                                 char_t* begin = s.data(stack.result);
10597                                 char_t* end = normalize_space(begin);
10598
10599                                 return xpath_string::from_heap_preallocated(begin, end);
10600                         }
10601
10602                         case ast_func_normalize_space_1:
10603                         {
10604                                 xpath_string s = _left->eval_string(c, stack);
10605
10606                                 char_t* begin = s.data(stack.result);
10607                                 char_t* end = normalize_space(begin);
10608
10609                                 return xpath_string::from_heap_preallocated(begin, end);
10610                         }
10611
10612                         case ast_func_translate:
10613                         {
10614                                 xpath_allocator_capture cr(stack.temp);
10615
10616                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10617
10618                                 xpath_string s = _left->eval_string(c, stack);
10619                                 xpath_string from = _right->eval_string(c, swapped_stack);
10620                                 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10621
10622                                 char_t* begin = s.data(stack.result);
10623                                 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10624
10625                                 return xpath_string::from_heap_preallocated(begin, end);
10626                         }
10627
10628                         case ast_opt_translate_table:
10629                         {
10630                                 xpath_string s = _left->eval_string(c, stack);
10631
10632                                 char_t* begin = s.data(stack.result);
10633                                 char_t* end = translate_table(begin, _data.table);
10634
10635                                 return xpath_string::from_heap_preallocated(begin, end);
10636                         }
10637
10638                         case ast_variable:
10639                         {
10640                                 assert(_rettype == _data.variable->type());
10641
10642                                 if (_rettype == xpath_type_string)
10643                                         return xpath_string::from_const(_data.variable->get_string());
10644
10645                                 // fallthrough to type conversion
10646                         }
10647
10648                         default:
10649                         {
10650                                 switch (_rettype)
10651                                 {
10652                                 case xpath_type_boolean:
10653                                         return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10654
10655                                 case xpath_type_number:
10656                                         return convert_number_to_string(eval_number(c, stack), stack.result);
10657
10658                                 case xpath_type_node_set:
10659                                 {
10660                                         xpath_allocator_capture cr(stack.temp);
10661
10662                                         xpath_stack swapped_stack = {stack.temp, stack.result};
10663
10664                                         xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10665                                         return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10666                                 }
10667
10668                                 default:
10669                                         assert(false && "Wrong expression for return type string");
10670                                         return xpath_string();
10671                                 }
10672                         }
10673                         }
10674                 }
10675
10676                 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10677                 {
10678                         switch (_type)
10679                         {
10680                         case ast_op_union:
10681                         {
10682                                 xpath_allocator_capture cr(stack.temp);
10683
10684                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10685
10686                                 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10687                                 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10688
10689                                 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10690                                 rs.set_type(xpath_node_set::type_unsorted);
10691
10692                                 rs.append(ls.begin(), ls.end(), stack.result);
10693                                 rs.remove_duplicates();
10694
10695                                 return rs;
10696                         }
10697
10698                         case ast_filter:
10699                         {
10700                                 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10701
10702                                 // either expression is a number or it contains position() call; sort by document order
10703                                 if (_test != predicate_posinv) set.sort_do();
10704
10705                                 bool once = eval_once(set.type(), eval);
10706
10707                                 apply_predicate(set, 0, stack, once);
10708
10709                                 return set;
10710                         }
10711
10712                         case ast_func_id:
10713                                 return xpath_node_set_raw();
10714
10715                         case ast_step:
10716                         {
10717                                 switch (_axis)
10718                                 {
10719                                 case axis_ancestor:
10720                                         return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10721
10722                                 case axis_ancestor_or_self:
10723                                         return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10724
10725                                 case axis_attribute:
10726                                         return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10727
10728                                 case axis_child:
10729                                         return step_do(c, stack, eval, axis_to_type<axis_child>());
10730
10731                                 case axis_descendant:
10732                                         return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10733
10734                                 case axis_descendant_or_self:
10735                                         return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10736
10737                                 case axis_following:
10738                                         return step_do(c, stack, eval, axis_to_type<axis_following>());
10739
10740                                 case axis_following_sibling:
10741                                         return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10742
10743                                 case axis_namespace:
10744                                         // namespaced axis is not supported
10745                                         return xpath_node_set_raw();
10746
10747                                 case axis_parent:
10748                                         return step_do(c, stack, eval, axis_to_type<axis_parent>());
10749
10750                                 case axis_preceding:
10751                                         return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10752
10753                                 case axis_preceding_sibling:
10754                                         return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10755
10756                                 case axis_self:
10757                                         return step_do(c, stack, eval, axis_to_type<axis_self>());
10758
10759                                 default:
10760                                         assert(false && "Unknown axis");
10761                                         return xpath_node_set_raw();
10762                                 }
10763                         }
10764
10765                         case ast_step_root:
10766                         {
10767                                 assert(!_right); // root step can't have any predicates
10768
10769                                 xpath_node_set_raw ns;
10770
10771                                 ns.set_type(xpath_node_set::type_sorted);
10772
10773                                 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10774                                 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10775
10776                                 return ns;
10777                         }
10778
10779                         case ast_variable:
10780                         {
10781                                 assert(_rettype == _data.variable->type());
10782
10783                                 if (_rettype == xpath_type_node_set)
10784                                 {
10785                                         const xpath_node_set& s = _data.variable->get_node_set();
10786
10787                                         xpath_node_set_raw ns;
10788
10789                                         ns.set_type(s.type());
10790                                         ns.append(s.begin(), s.end(), stack.result);
10791
10792                                         return ns;
10793                                 }
10794
10795                                 // fallthrough to type conversion
10796                         }
10797
10798                         default:
10799                                 assert(false && "Wrong expression for return type node set");
10800                                 return xpath_node_set_raw();
10801                         }
10802                 }
10803
10804                 void optimize(xpath_allocator* alloc)
10805                 {
10806                         if (_left) _left->optimize(alloc);
10807                         if (_right) _right->optimize(alloc);
10808                         if (_next) _next->optimize(alloc);
10809
10810                         optimize_self(alloc);
10811                 }
10812
10813                 void optimize_self(xpath_allocator* alloc)
10814                 {
10815                         // Rewrite [position()=expr] with [expr]
10816                         // Note that this step has to go before classification to recognize [position()=1]
10817                         if ((_type == ast_filter || _type == ast_predicate) &&
10818                                 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10819                         {
10820                                 _right = _right->_right;
10821                         }
10822
10823                         // Classify filter/predicate ops to perform various optimizations during evaluation
10824                         if (_type == ast_filter || _type == ast_predicate)
10825                         {
10826                                 assert(_test == predicate_default);
10827
10828                                 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10829                                         _test = predicate_constant_one;
10830                                 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10831                                         _test = predicate_constant;
10832                                 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10833                                         _test = predicate_posinv;
10834                         }
10835
10836                         // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10837                         // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10838                         // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10839                         // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10840                         if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10841                                 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10842                                 is_posinv_step())
10843                         {
10844                                 if (_axis == axis_child || _axis == axis_descendant)
10845                                         _axis = axis_descendant;
10846                                 else
10847                                         _axis = axis_descendant_or_self;
10848
10849                                 _left = _left->_left;
10850                         }
10851
10852                         // Use optimized lookup table implementation for translate() with constant arguments
10853                         if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10854                         {
10855                                 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10856
10857                                 if (table)
10858                                 {
10859                                         _type = ast_opt_translate_table;
10860                                         _data.table = table;
10861                                 }
10862                         }
10863
10864                         // Use optimized path for @attr = 'value' or @attr = $value
10865                         if (_type == ast_op_equal &&
10866                                 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10867                                 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10868                         {
10869                                 _type = ast_opt_compare_attribute;
10870                         }
10871                 }
10872
10873                 bool is_posinv_expr() const
10874                 {
10875                         switch (_type)
10876                         {
10877                         case ast_func_position:
10878                         case ast_func_last:
10879                                 return false;
10880
10881                         case ast_string_constant:
10882                         case ast_number_constant:
10883                         case ast_variable:
10884                                 return true;
10885
10886                         case ast_step:
10887                         case ast_step_root:
10888                                 return true;
10889
10890                         case ast_predicate:
10891                         case ast_filter:
10892                                 return true;
10893
10894                         default:
10895                                 if (_left && !_left->is_posinv_expr()) return false;
10896
10897                                 for (xpath_ast_node* n = _right; n; n = n->_next)
10898                                         if (!n->is_posinv_expr()) return false;
10899
10900                                 return true;
10901                         }
10902                 }
10903
10904                 bool is_posinv_step() const
10905                 {
10906                         assert(_type == ast_step);
10907
10908                         for (xpath_ast_node* n = _right; n; n = n->_next)
10909                         {
10910                                 assert(n->_type == ast_predicate);
10911
10912                                 if (n->_test != predicate_posinv)
10913                                         return false;
10914                         }
10915
10916                         return true;
10917                 }
10918
10919                 xpath_value_type rettype() const
10920                 {
10921                         return static_cast<xpath_value_type>(_rettype);
10922                 }
10923         };
10924
10925         struct xpath_parser
10926         {
10927                 xpath_allocator* _alloc;
10928                 xpath_lexer _lexer;
10929
10930                 const char_t* _query;
10931                 xpath_variable_set* _variables;
10932
10933                 xpath_parse_result* _result;
10934
10935                 char_t _scratch[32];
10936
10937         #ifdef PUGIXML_NO_EXCEPTIONS
10938                 jmp_buf _error_handler;
10939         #endif
10940
10941                 void throw_error(const char* message)
10942                 {
10943                         _result->error = message;
10944                         _result->offset = _lexer.current_pos() - _query;
10945
10946                 #ifdef PUGIXML_NO_EXCEPTIONS
10947                         longjmp(_error_handler, 1);
10948                 #else
10949                         throw xpath_exception(*_result);
10950                 #endif
10951                 }
10952
10953                 void throw_error_oom()
10954                 {
10955                 #ifdef PUGIXML_NO_EXCEPTIONS
10956                         throw_error("Out of memory");
10957                 #else
10958                         throw std::bad_alloc();
10959                 #endif
10960                 }
10961
10962                 void* alloc_node()
10963                 {
10964                         void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10965
10966                         if (!result) throw_error_oom();
10967
10968                         return result;
10969                 }
10970
10971                 const char_t* alloc_string(const xpath_lexer_string& value)
10972                 {
10973                         if (value.begin)
10974                         {
10975                                 size_t length = static_cast<size_t>(value.end - value.begin);
10976
10977                                 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10978                                 if (!c) throw_error_oom();
10979                                 assert(c); // workaround for clang static analysis
10980
10981                                 memcpy(c, value.begin, length * sizeof(char_t));
10982                                 c[length] = 0;
10983
10984                                 return c;
10985                         }
10986                         else return 0;
10987                 }
10988
10989                 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
10990                 {
10991                         assert(argc <= 1);
10992
10993                         if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10994
10995                         return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10996                 }
10997
10998                 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
10999                 {
11000                         switch (name.begin[0])
11001                         {
11002                         case 'b':
11003                                 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11004                                         return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
11005
11006                                 break;
11007
11008                         case 'c':
11009                                 if (name == PUGIXML_TEXT("count") && argc == 1)
11010                                 {
11011                                         if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11012                                         return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
11013                                 }
11014                                 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11015                                         return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11016                                 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11017                                         return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11018                                 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11019                                         return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
11020
11021                                 break;
11022
11023                         case 'f':
11024                                 if (name == PUGIXML_TEXT("false") && argc == 0)
11025                                         return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
11026                                 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11027                                         return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
11028
11029                                 break;
11030
11031                         case 'i':
11032                                 if (name == PUGIXML_TEXT("id") && argc == 1)
11033                                         return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
11034
11035                                 break;
11036
11037                         case 'l':
11038                                 if (name == PUGIXML_TEXT("last") && argc == 0)
11039                                         return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
11040                                 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11041                                         return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
11042                                 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11043                                         return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
11044
11045                                 break;
11046
11047                         case 'n':
11048                                 if (name == PUGIXML_TEXT("name") && argc <= 1)
11049                                         return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
11050                                 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11051                                         return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
11052                                 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11053                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11054                                 else if (name == PUGIXML_TEXT("not") && argc == 1)
11055                                         return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
11056                                 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11057                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11058
11059                                 break;
11060
11061                         case 'p':
11062                                 if (name == PUGIXML_TEXT("position") && argc == 0)
11063                                         return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
11064
11065                                 break;
11066
11067                         case 'r':
11068                                 if (name == PUGIXML_TEXT("round") && argc == 1)
11069                                         return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
11070
11071                                 break;
11072
11073                         case 's':
11074                                 if (name == PUGIXML_TEXT("string") && argc <= 1)
11075                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11076                                 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11077                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11078                                 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11079                                         return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11080                                 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11081                                         return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11082                                 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11083                                         return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11084                                 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11085                                         return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11086                                 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11087                                 {
11088                                         if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11089                                         return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
11090                                 }
11091
11092                                 break;
11093
11094                         case 't':
11095                                 if (name == PUGIXML_TEXT("translate") && argc == 3)
11096                                         return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11097                                 else if (name == PUGIXML_TEXT("true") && argc == 0)
11098                                         return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
11099
11100                                 break;
11101
11102                         default:
11103                                 break;
11104                         }
11105
11106                         throw_error("Unrecognized function or wrong parameter count");
11107
11108                         return 0;
11109                 }
11110
11111                 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11112                 {
11113                         specified = true;
11114
11115                         switch (name.begin[0])
11116                         {
11117                         case 'a':
11118                                 if (name == PUGIXML_TEXT("ancestor"))
11119                                         return axis_ancestor;
11120                                 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11121                                         return axis_ancestor_or_self;
11122                                 else if (name == PUGIXML_TEXT("attribute"))
11123                                         return axis_attribute;
11124
11125                                 break;
11126
11127                         case 'c':
11128                                 if (name == PUGIXML_TEXT("child"))
11129                                         return axis_child;
11130
11131                                 break;
11132
11133                         case 'd':
11134                                 if (name == PUGIXML_TEXT("descendant"))
11135                                         return axis_descendant;
11136                                 else if (name == PUGIXML_TEXT("descendant-or-self"))
11137                                         return axis_descendant_or_self;
11138
11139                                 break;
11140
11141                         case 'f':
11142                                 if (name == PUGIXML_TEXT("following"))
11143                                         return axis_following;
11144                                 else if (name == PUGIXML_TEXT("following-sibling"))
11145                                         return axis_following_sibling;
11146
11147                                 break;
11148
11149                         case 'n':
11150                                 if (name == PUGIXML_TEXT("namespace"))
11151                                         return axis_namespace;
11152
11153                                 break;
11154
11155                         case 'p':
11156                                 if (name == PUGIXML_TEXT("parent"))
11157                                         return axis_parent;
11158                                 else if (name == PUGIXML_TEXT("preceding"))
11159                                         return axis_preceding;
11160                                 else if (name == PUGIXML_TEXT("preceding-sibling"))
11161                                         return axis_preceding_sibling;
11162
11163                                 break;
11164
11165                         case 's':
11166                                 if (name == PUGIXML_TEXT("self"))
11167                                         return axis_self;
11168
11169                                 break;
11170
11171                         default:
11172                                 break;
11173                         }
11174
11175                         specified = false;
11176                         return axis_child;
11177                 }
11178
11179                 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11180                 {
11181                         switch (name.begin[0])
11182                         {
11183                         case 'c':
11184                                 if (name == PUGIXML_TEXT("comment"))
11185                                         return nodetest_type_comment;
11186
11187                                 break;
11188
11189                         case 'n':
11190                                 if (name == PUGIXML_TEXT("node"))
11191                                         return nodetest_type_node;
11192
11193                                 break;
11194
11195                         case 'p':
11196                                 if (name == PUGIXML_TEXT("processing-instruction"))
11197                                         return nodetest_type_pi;
11198
11199                                 break;
11200
11201                         case 't':
11202                                 if (name == PUGIXML_TEXT("text"))
11203                                         return nodetest_type_text;
11204
11205                                 break;
11206
11207                         default:
11208                                 break;
11209                         }
11210
11211                         return nodetest_none;
11212                 }
11213
11214                 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11215                 xpath_ast_node* parse_primary_expression()
11216                 {
11217                         switch (_lexer.current())
11218                         {
11219                         case lex_var_ref:
11220                         {
11221                                 xpath_lexer_string name = _lexer.contents();
11222
11223                                 if (!_variables)
11224                                         throw_error("Unknown variable: variable set is not provided");
11225
11226                                 xpath_variable* var = 0;
11227                                 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11228                                         throw_error_oom();
11229
11230                                 if (!var)
11231                                         throw_error("Unknown variable: variable set does not contain the given name");
11232
11233                                 _lexer.next();
11234
11235                                 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
11236                         }
11237
11238                         case lex_open_brace:
11239                         {
11240                                 _lexer.next();
11241
11242                                 xpath_ast_node* n = parse_expression();
11243
11244                                 if (_lexer.current() != lex_close_brace)
11245                                         throw_error("Unmatched braces");
11246
11247                                 _lexer.next();
11248
11249                                 return n;
11250                         }
11251
11252                         case lex_quoted_string:
11253                         {
11254                                 const char_t* value = alloc_string(_lexer.contents());
11255
11256                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
11257                                 _lexer.next();
11258
11259                                 return n;
11260                         }
11261
11262                         case lex_number:
11263                         {
11264                                 double value = 0;
11265
11266                                 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11267                                         throw_error_oom();
11268
11269                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
11270                                 _lexer.next();
11271
11272                                 return n;
11273                         }
11274
11275                         case lex_string:
11276                         {
11277                                 xpath_ast_node* args[2] = {0};
11278                                 size_t argc = 0;
11279
11280                                 xpath_lexer_string function = _lexer.contents();
11281                                 _lexer.next();
11282
11283                                 xpath_ast_node* last_arg = 0;
11284
11285                                 if (_lexer.current() != lex_open_brace)
11286                                         throw_error("Unrecognized function call");
11287                                 _lexer.next();
11288
11289                                 if (_lexer.current() != lex_close_brace)
11290                                         args[argc++] = parse_expression();
11291
11292                                 while (_lexer.current() != lex_close_brace)
11293                                 {
11294                                         if (_lexer.current() != lex_comma)
11295                                                 throw_error("No comma between function arguments");
11296                                         _lexer.next();
11297
11298                                         xpath_ast_node* n = parse_expression();
11299
11300                                         if (argc < 2) args[argc] = n;
11301                                         else last_arg->set_next(n);
11302
11303                                         argc++;
11304                                         last_arg = n;
11305                                 }
11306
11307                                 _lexer.next();
11308
11309                                 return parse_function(function, argc, args);
11310                         }
11311
11312                         default:
11313                                 throw_error("Unrecognizable primary expression");
11314
11315                                 return 0;
11316                         }
11317                 }
11318
11319                 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11320                 // Predicate ::= '[' PredicateExpr ']'
11321                 // PredicateExpr ::= Expr
11322                 xpath_ast_node* parse_filter_expression()
11323                 {
11324                         xpath_ast_node* n = parse_primary_expression();
11325
11326                         while (_lexer.current() == lex_open_square_brace)
11327                         {
11328                                 _lexer.next();
11329
11330                                 xpath_ast_node* expr = parse_expression();
11331
11332                                 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
11333
11334                                 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
11335
11336                                 if (_lexer.current() != lex_close_square_brace)
11337                                         throw_error("Unmatched square brace");
11338
11339                                 _lexer.next();
11340                         }
11341
11342                         return n;
11343                 }
11344
11345                 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11346                 // AxisSpecifier ::= AxisName '::' | '@'?
11347                 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11348                 // NameTest ::= '*' | NCName ':' '*' | QName
11349                 // AbbreviatedStep ::= '.' | '..'
11350                 xpath_ast_node* parse_step(xpath_ast_node* set)
11351                 {
11352                         if (set && set->rettype() != xpath_type_node_set)
11353                                 throw_error("Step has to be applied to node set");
11354
11355                         bool axis_specified = false;
11356                         axis_t axis = axis_child; // implied child axis
11357
11358                         if (_lexer.current() == lex_axis_attribute)
11359                         {
11360                                 axis = axis_attribute;
11361                                 axis_specified = true;
11362
11363                                 _lexer.next();
11364                         }
11365                         else if (_lexer.current() == lex_dot)
11366                         {
11367                                 _lexer.next();
11368
11369                                 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
11370                         }
11371                         else if (_lexer.current() == lex_double_dot)
11372                         {
11373                                 _lexer.next();
11374
11375                                 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11376                         }
11377
11378                         nodetest_t nt_type = nodetest_none;
11379                         xpath_lexer_string nt_name;
11380
11381                         if (_lexer.current() == lex_string)
11382                         {
11383                                 // node name test
11384                                 nt_name = _lexer.contents();
11385                                 _lexer.next();
11386
11387                                 // was it an axis name?
11388                                 if (_lexer.current() == lex_double_colon)
11389                                 {
11390                                         // parse axis name
11391                                         if (axis_specified) throw_error("Two axis specifiers in one step");
11392
11393                                         axis = parse_axis_name(nt_name, axis_specified);
11394
11395                                         if (!axis_specified) throw_error("Unknown axis");
11396
11397                                         // read actual node test
11398                                         _lexer.next();
11399
11400                                         if (_lexer.current() == lex_multiply)
11401                                         {
11402                                                 nt_type = nodetest_all;
11403                                                 nt_name = xpath_lexer_string();
11404                                                 _lexer.next();
11405                                         }
11406                                         else if (_lexer.current() == lex_string)
11407                                         {
11408                                                 nt_name = _lexer.contents();
11409                                                 _lexer.next();
11410                                         }
11411                                         else throw_error("Unrecognized node test");
11412                                 }
11413
11414                                 if (nt_type == nodetest_none)
11415                                 {
11416                                         // node type test or processing-instruction
11417                                         if (_lexer.current() == lex_open_brace)
11418                                         {
11419                                                 _lexer.next();
11420
11421                                                 if (_lexer.current() == lex_close_brace)
11422                                                 {
11423                                                         _lexer.next();
11424
11425                                                         nt_type = parse_node_test_type(nt_name);
11426
11427                                                         if (nt_type == nodetest_none) throw_error("Unrecognized node type");
11428
11429                                                         nt_name = xpath_lexer_string();
11430                                                 }
11431                                                 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11432                                                 {
11433                                                         if (_lexer.current() != lex_quoted_string)
11434                                                                 throw_error("Only literals are allowed as arguments to processing-instruction()");
11435
11436                                                         nt_type = nodetest_pi;
11437                                                         nt_name = _lexer.contents();
11438                                                         _lexer.next();
11439
11440                                                         if (_lexer.current() != lex_close_brace)
11441                                                                 throw_error("Unmatched brace near processing-instruction()");
11442                                                         _lexer.next();
11443                                                 }
11444                                                 else
11445                                                         throw_error("Unmatched brace near node type test");
11446
11447                                         }
11448                                         // QName or NCName:*
11449                                         else
11450                                         {
11451                                                 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11452                                                 {
11453                                                         nt_name.end--; // erase *
11454
11455                                                         nt_type = nodetest_all_in_namespace;
11456                                                 }
11457                                                 else nt_type = nodetest_name;
11458                                         }
11459                                 }
11460                         }
11461                         else if (_lexer.current() == lex_multiply)
11462                         {
11463                                 nt_type = nodetest_all;
11464                                 _lexer.next();
11465                         }
11466                         else throw_error("Unrecognized node test");
11467
11468                         xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
11469
11470                         xpath_ast_node* last = 0;
11471
11472                         while (_lexer.current() == lex_open_square_brace)
11473                         {
11474                                 _lexer.next();
11475
11476                                 xpath_ast_node* expr = parse_expression();
11477
11478                                 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11479
11480                                 if (_lexer.current() != lex_close_square_brace)
11481                                         throw_error("Unmatched square brace");
11482                                 _lexer.next();
11483
11484                                 if (last) last->set_next(pred);
11485                                 else n->set_right(pred);
11486
11487                                 last = pred;
11488                         }
11489
11490                         return n;
11491                 }
11492
11493                 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11494                 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11495                 {
11496                         xpath_ast_node* n = parse_step(set);
11497
11498                         while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11499                         {
11500                                 lexeme_t l = _lexer.current();
11501                                 _lexer.next();
11502
11503                                 if (l == lex_double_slash)
11504                                         n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11505
11506                                 n = parse_step(n);
11507                         }
11508
11509                         return n;
11510                 }
11511
11512                 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11513                 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11514                 xpath_ast_node* parse_location_path()
11515                 {
11516                         if (_lexer.current() == lex_slash)
11517                         {
11518                                 _lexer.next();
11519
11520                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11521
11522                                 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11523                                 lexeme_t l = _lexer.current();
11524
11525                                 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11526                                         return parse_relative_location_path(n);
11527                                 else
11528                                         return n;
11529                         }
11530                         else if (_lexer.current() == lex_double_slash)
11531                         {
11532                                 _lexer.next();
11533
11534                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11535                                 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11536
11537                                 return parse_relative_location_path(n);
11538                         }
11539
11540                         // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11541                         return parse_relative_location_path(0);
11542                 }
11543
11544                 // PathExpr ::= LocationPath
11545                 //                              | FilterExpr
11546                 //                              | FilterExpr '/' RelativeLocationPath
11547                 //                              | FilterExpr '//' RelativeLocationPath
11548                 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11549                 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11550                 xpath_ast_node* parse_path_or_unary_expression()
11551                 {
11552                         // Clarification.
11553                         // PathExpr begins with either LocationPath or FilterExpr.
11554                         // FilterExpr begins with PrimaryExpr
11555                         // PrimaryExpr begins with '$' in case of it being a variable reference,
11556                         // '(' in case of it being an expression, string literal, number constant or
11557                         // function call.
11558
11559                         if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11560                                 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11561                                 _lexer.current() == lex_string)
11562                         {
11563                                 if (_lexer.current() == lex_string)
11564                                 {
11565                                         // This is either a function call, or not - if not, we shall proceed with location path
11566                                         const char_t* state = _lexer.state();
11567
11568                                         while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11569
11570                                         if (*state != '(') return parse_location_path();
11571
11572                                         // This looks like a function call; however this still can be a node-test. Check it.
11573                                         if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
11574                                 }
11575
11576                                 xpath_ast_node* n = parse_filter_expression();
11577
11578                                 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11579                                 {
11580                                         lexeme_t l = _lexer.current();
11581                                         _lexer.next();
11582
11583                                         if (l == lex_double_slash)
11584                                         {
11585                                                 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
11586
11587                                                 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11588                                         }
11589
11590                                         // select from location path
11591                                         return parse_relative_location_path(n);
11592                                 }
11593
11594                                 return n;
11595                         }
11596                         else if (_lexer.current() == lex_minus)
11597                         {
11598                                 _lexer.next();
11599
11600                                 // precedence 7+ - only parses union expressions
11601                                 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
11602
11603                                 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11604                         }
11605                         else
11606                                 return parse_location_path();
11607                 }
11608
11609                 struct binary_op_t
11610                 {
11611                         ast_type_t asttype;
11612                         xpath_value_type rettype;
11613                         int precedence;
11614
11615                         binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11616                         {
11617                         }
11618
11619                         binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11620                         {
11621                         }
11622
11623                         static binary_op_t parse(xpath_lexer& lexer)
11624                         {
11625                                 switch (lexer.current())
11626                                 {
11627                                 case lex_string:
11628                                         if (lexer.contents() == PUGIXML_TEXT("or"))
11629                                                 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11630                                         else if (lexer.contents() == PUGIXML_TEXT("and"))
11631                                                 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11632                                         else if (lexer.contents() == PUGIXML_TEXT("div"))
11633                                                 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11634                                         else if (lexer.contents() == PUGIXML_TEXT("mod"))
11635                                                 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11636                                         else
11637                                                 return binary_op_t();
11638
11639                                 case lex_equal:
11640                                         return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11641
11642                                 case lex_not_equal:
11643                                         return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11644
11645                                 case lex_less:
11646                                         return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11647
11648                                 case lex_greater:
11649                                         return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11650
11651                                 case lex_less_or_equal:
11652                                         return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11653
11654                                 case lex_greater_or_equal:
11655                                         return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11656
11657                                 case lex_plus:
11658                                         return binary_op_t(ast_op_add, xpath_type_number, 5);
11659
11660                                 case lex_minus:
11661                                         return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11662
11663                                 case lex_multiply:
11664                                         return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11665
11666                                 case lex_union:
11667                                         return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11668
11669                                 default:
11670                                         return binary_op_t();
11671                                 }
11672                         }
11673                 };
11674
11675                 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11676                 {
11677                         binary_op_t op = binary_op_t::parse(_lexer);
11678
11679                         while (op.asttype != ast_unknown && op.precedence >= limit)
11680                         {
11681                                 _lexer.next();
11682
11683                                 xpath_ast_node* rhs = parse_path_or_unary_expression();
11684
11685                                 binary_op_t nextop = binary_op_t::parse(_lexer);
11686
11687                                 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11688                                 {
11689                                         rhs = parse_expression_rec(rhs, nextop.precedence);
11690
11691                                         nextop = binary_op_t::parse(_lexer);
11692                                 }
11693
11694                                 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11695                                         throw_error("Union operator has to be applied to node sets");
11696
11697                                 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11698
11699                                 op = binary_op_t::parse(_lexer);
11700                         }
11701
11702                         return lhs;
11703                 }
11704
11705                 // Expr ::= OrExpr
11706                 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11707                 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11708                 // EqualityExpr ::= RelationalExpr
11709                 //                                      | EqualityExpr '=' RelationalExpr
11710                 //                                      | EqualityExpr '!=' RelationalExpr
11711                 // RelationalExpr ::= AdditiveExpr
11712                 //                                        | RelationalExpr '<' AdditiveExpr
11713                 //                                        | RelationalExpr '>' AdditiveExpr
11714                 //                                        | RelationalExpr '<=' AdditiveExpr
11715                 //                                        | RelationalExpr '>=' AdditiveExpr
11716                 // AdditiveExpr ::= MultiplicativeExpr
11717                 //                                      | AdditiveExpr '+' MultiplicativeExpr
11718                 //                                      | AdditiveExpr '-' MultiplicativeExpr
11719                 // MultiplicativeExpr ::= UnaryExpr
11720                 //                                                | MultiplicativeExpr '*' UnaryExpr
11721                 //                                                | MultiplicativeExpr 'div' UnaryExpr
11722                 //                                                | MultiplicativeExpr 'mod' UnaryExpr
11723                 xpath_ast_node* parse_expression()
11724                 {
11725                         return parse_expression_rec(parse_path_or_unary_expression(), 0);
11726                 }
11727
11728                 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11729                 {
11730                 }
11731
11732                 xpath_ast_node* parse()
11733                 {
11734                         xpath_ast_node* result = parse_expression();
11735
11736                         if (_lexer.current() != lex_eof)
11737                         {
11738                                 // there are still unparsed tokens left, error
11739                                 throw_error("Incorrect query");
11740                         }
11741
11742                         return result;
11743                 }
11744
11745                 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11746                 {
11747                         xpath_parser parser(query, variables, alloc, result);
11748
11749                 #ifdef PUGIXML_NO_EXCEPTIONS
11750                         int error = setjmp(parser._error_handler);
11751
11752                         return (error == 0) ? parser.parse() : 0;
11753                 #else
11754                         return parser.parse();
11755                 #endif
11756                 }
11757         };
11758
11759         struct xpath_query_impl
11760         {
11761                 static xpath_query_impl* create()
11762                 {
11763                         void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11764                         if (!memory) return 0;
11765
11766                         return new (memory) xpath_query_impl();
11767                 }
11768
11769                 static void destroy(xpath_query_impl* impl)
11770                 {
11771                         // free all allocated pages
11772                         impl->alloc.release();
11773
11774                         // free allocator memory (with the first page)
11775                         xml_memory::deallocate(impl);
11776                 }
11777
11778                 xpath_query_impl(): root(0), alloc(&block)
11779                 {
11780                         block.next = 0;
11781                         block.capacity = sizeof(block.data);
11782                 }
11783
11784                 xpath_ast_node* root;
11785                 xpath_allocator alloc;
11786                 xpath_memory_block block;
11787         };
11788
11789         PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
11790         {
11791                 if (!impl) return xpath_string();
11792
11793         #ifdef PUGIXML_NO_EXCEPTIONS
11794                 if (setjmp(sd.error_handler)) return xpath_string();
11795         #endif
11796
11797                 xpath_context c(n, 1, 1);
11798
11799                 return impl->root->eval_string(c, sd.stack);
11800         }
11801
11802         PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11803         {
11804                 if (!impl) return 0;
11805
11806                 if (impl->root->rettype() != xpath_type_node_set)
11807                 {
11808                 #ifdef PUGIXML_NO_EXCEPTIONS
11809                         return 0;
11810                 #else
11811                         xpath_parse_result res;
11812                         res.error = "Expression does not evaluate to node set";
11813
11814                         throw xpath_exception(res);
11815                 #endif
11816                 }
11817
11818                 return impl->root;
11819         }
11820 PUGI__NS_END
11821
11822 namespace pugi
11823 {
11824 #ifndef PUGIXML_NO_EXCEPTIONS
11825         PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11826         {
11827                 assert(_result.error);
11828         }
11829
11830         PUGI__FN const char* xpath_exception::what() const throw()
11831         {
11832                 return _result.error;
11833         }
11834
11835         PUGI__FN const xpath_parse_result& xpath_exception::result() const
11836         {
11837                 return _result;
11838         }
11839 #endif
11840
11841         PUGI__FN xpath_node::xpath_node()
11842         {
11843         }
11844
11845         PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11846         {
11847         }
11848
11849         PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11850         {
11851         }
11852
11853         PUGI__FN xml_node xpath_node::node() const
11854         {
11855                 return _attribute ? xml_node() : _node;
11856         }
11857
11858         PUGI__FN xml_attribute xpath_node::attribute() const
11859         {
11860                 return _attribute;
11861         }
11862
11863         PUGI__FN xml_node xpath_node::parent() const
11864         {
11865                 return _attribute ? _node : _node.parent();
11866         }
11867
11868         PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11869         {
11870         }
11871
11872         PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11873         {
11874                 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11875         }
11876
11877         PUGI__FN bool xpath_node::operator!() const
11878         {
11879                 return !(_node || _attribute);
11880         }
11881
11882         PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11883         {
11884                 return _node == n._node && _attribute == n._attribute;
11885         }
11886
11887         PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11888         {
11889                 return _node != n._node || _attribute != n._attribute;
11890         }
11891
11892 #ifdef __BORLANDC__
11893         PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
11894         {
11895                 return (bool)lhs && rhs;
11896         }
11897
11898         PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
11899         {
11900                 return (bool)lhs || rhs;
11901         }
11902 #endif
11903
11904         PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
11905         {
11906                 assert(begin_ <= end_);
11907
11908                 size_t size_ = static_cast<size_t>(end_ - begin_);
11909
11910                 if (size_ <= 1)
11911                 {
11912                         // deallocate old buffer
11913                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11914
11915                         // use internal buffer
11916                         if (begin_ != end_) _storage = *begin_;
11917
11918                         _begin = &_storage;
11919                         _end = &_storage + size_;
11920                         _type = type_;
11921                 }
11922                 else
11923                 {
11924                         // make heap copy
11925                         xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11926
11927                         if (!storage)
11928                         {
11929                         #ifdef PUGIXML_NO_EXCEPTIONS
11930                                 return;
11931                         #else
11932                                 throw std::bad_alloc();
11933                         #endif
11934                         }
11935
11936                         memcpy(storage, begin_, size_ * sizeof(xpath_node));
11937
11938                         // deallocate old buffer
11939                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11940
11941                         // finalize
11942                         _begin = storage;
11943                         _end = storage + size_;
11944                         _type = type_;
11945                 }
11946         }
11947
11948 #if __cplusplus >= 201103
11949         PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
11950         {
11951                 _type = rhs._type;
11952                 _storage = rhs._storage;
11953                 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11954                 _end = _begin + (rhs._end - rhs._begin);
11955
11956                 rhs._type = type_unsorted;
11957                 rhs._begin = &rhs._storage;
11958                 rhs._end = rhs._begin;
11959         }
11960 #endif
11961
11962         PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11963         {
11964         }
11965
11966         PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11967         {
11968                 _assign(begin_, end_, type_);
11969         }
11970
11971         PUGI__FN xpath_node_set::~xpath_node_set()
11972         {
11973                 if (_begin != &_storage)
11974                         impl::xml_memory::deallocate(_begin);
11975         }
11976
11977         PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11978         {
11979                 _assign(ns._begin, ns._end, ns._type);
11980         }
11981
11982         PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
11983         {
11984                 if (this == &ns) return *this;
11985
11986                 _assign(ns._begin, ns._end, ns._type);
11987
11988                 return *this;
11989         }
11990
11991 #if __cplusplus >= 201103
11992         PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11993         {
11994                 _move(rhs);
11995         }
11996
11997         PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
11998         {
11999                 if (this == &rhs) return *this;
12000
12001                 if (_begin != &_storage)
12002                         impl::xml_memory::deallocate(_begin);
12003
12004                 _move(rhs);
12005
12006                 return *this;
12007         }
12008 #endif
12009
12010         PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12011         {
12012                 return _type;
12013         }
12014
12015         PUGI__FN size_t xpath_node_set::size() const
12016         {
12017                 return _end - _begin;
12018         }
12019
12020         PUGI__FN bool xpath_node_set::empty() const
12021         {
12022                 return _begin == _end;
12023         }
12024
12025         PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12026         {
12027                 assert(index < size());
12028                 return _begin[index];
12029         }
12030
12031         PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12032         {
12033                 return _begin;
12034         }
12035
12036         PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12037         {
12038                 return _end;
12039         }
12040
12041         PUGI__FN void xpath_node_set::sort(bool reverse)
12042         {
12043                 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12044         }
12045
12046         PUGI__FN xpath_node xpath_node_set::first() const
12047         {
12048                 return impl::xpath_first(_begin, _end, _type);
12049         }
12050
12051         PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12052         {
12053         }
12054
12055         PUGI__FN xpath_parse_result::operator bool() const
12056         {
12057                 return error == 0;
12058         }
12059
12060         PUGI__FN const char* xpath_parse_result::description() const
12061         {
12062                 return error ? error : "No error";
12063         }
12064
12065         PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12066         {
12067         }
12068
12069         PUGI__FN const char_t* xpath_variable::name() const
12070         {
12071                 switch (_type)
12072                 {
12073                 case xpath_type_node_set:
12074                         return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12075
12076                 case xpath_type_number:
12077                         return static_cast<const impl::xpath_variable_number*>(this)->name;
12078
12079                 case xpath_type_string:
12080                         return static_cast<const impl::xpath_variable_string*>(this)->name;
12081
12082                 case xpath_type_boolean:
12083                         return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12084
12085                 default:
12086                         assert(false && "Invalid variable type");
12087                         return 0;
12088                 }
12089         }
12090
12091         PUGI__FN xpath_value_type xpath_variable::type() const
12092         {
12093                 return _type;
12094         }
12095
12096         PUGI__FN bool xpath_variable::get_boolean() const
12097         {
12098                 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12099         }
12100
12101         PUGI__FN double xpath_variable::get_number() const
12102         {
12103                 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12104         }
12105
12106         PUGI__FN const char_t* xpath_variable::get_string() const
12107         {
12108                 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12109                 return value ? value : PUGIXML_TEXT("");
12110         }
12111
12112         PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12113         {
12114                 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12115         }
12116
12117         PUGI__FN bool xpath_variable::set(bool value)
12118         {
12119                 if (_type != xpath_type_boolean) return false;
12120
12121                 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12122                 return true;
12123         }
12124
12125         PUGI__FN bool xpath_variable::set(double value)
12126         {
12127                 if (_type != xpath_type_number) return false;
12128
12129                 static_cast<impl::xpath_variable_number*>(this)->value = value;
12130                 return true;
12131         }
12132
12133         PUGI__FN bool xpath_variable::set(const char_t* value)
12134         {
12135                 if (_type != xpath_type_string) return false;
12136
12137                 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12138
12139                 // duplicate string
12140                 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12141
12142                 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12143                 if (!copy) return false;
12144
12145                 memcpy(copy, value, size);
12146
12147                 // replace old string
12148                 if (var->value) impl::xml_memory::deallocate(var->value);
12149                 var->value = copy;
12150
12151                 return true;
12152         }
12153
12154         PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12155         {
12156                 if (_type != xpath_type_node_set) return false;
12157
12158                 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12159                 return true;
12160         }
12161
12162         PUGI__FN xpath_variable_set::xpath_variable_set()
12163         {
12164                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12165                         _data[i] = 0;
12166         }
12167
12168         PUGI__FN xpath_variable_set::~xpath_variable_set()
12169         {
12170                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12171                         _destroy(_data[i]);
12172         }
12173
12174         PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12175         {
12176                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12177                         _data[i] = 0;
12178
12179                 _assign(rhs);
12180         }
12181
12182         PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12183         {
12184                 if (this == &rhs) return *this;
12185
12186                 _assign(rhs);
12187
12188                 return *this;
12189         }
12190
12191 #if __cplusplus >= 201103
12192         PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
12193         {
12194                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12195                 {
12196                         _data[i] = rhs._data[i];
12197                         rhs._data[i] = 0;
12198                 }
12199         }
12200
12201         PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
12202         {
12203                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12204                 {
12205                         _destroy(_data[i]);
12206
12207                         _data[i] = rhs._data[i];
12208                         rhs._data[i] = 0;
12209                 }
12210
12211                 return *this;
12212         }
12213 #endif
12214
12215         PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12216         {
12217                 xpath_variable_set temp;
12218
12219                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12220                         if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12221                                 return;
12222
12223                 _swap(temp);
12224         }
12225
12226         PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12227         {
12228                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12229                 {
12230                         xpath_variable* chain = _data[i];
12231
12232                         _data[i] = rhs._data[i];
12233                         rhs._data[i] = chain;
12234                 }
12235         }
12236
12237         PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12238         {
12239                 if (!name) {
12240                         return 0;
12241                 }
12242                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12243                 size_t hash = impl::hash_string(name) % hash_size;
12244
12245                 // look for existing variable
12246                 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12247                         if (var->name() && impl::strequal(var->name(), name))
12248                                 return var;
12249
12250                 return 0;
12251         }
12252
12253         PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12254         {
12255                 xpath_variable* last = 0;
12256
12257                 while (var)
12258                 {
12259                         // allocate storage for new variable
12260                         xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12261                         if (!nvar) return false;
12262
12263                         // link the variable to the result immediately to handle failures gracefully
12264                         if (last)
12265                                 last->_next = nvar;
12266                         else
12267                                 *out_result = nvar;
12268
12269                         last = nvar;
12270
12271                         // copy the value; this can fail due to out-of-memory conditions
12272                         if (!impl::copy_xpath_variable(nvar, var)) return false;
12273
12274                         var = var->_next;
12275                 }
12276
12277                 return true;
12278         }
12279
12280         PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12281         {
12282                 while (var)
12283                 {
12284                         xpath_variable* next = var->_next;
12285
12286                         impl::delete_xpath_variable(var->_type, var);
12287
12288                         var = next;
12289                 }
12290         }
12291
12292         PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12293         {
12294                 if (!name) {
12295                         return nullptr;
12296                 }
12297                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12298                 size_t hash = impl::hash_string(name) % hash_size;
12299
12300                 // look for existing variable
12301                 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12302                         if (var->name() && impl::strequal(var->name(), name))
12303                                 return var->type() == type ? var : 0;
12304
12305                 // add new variable
12306                 xpath_variable* result = impl::new_xpath_variable(type, name);
12307
12308                 if (result)
12309                 {
12310                         result->_next = _data[hash];
12311
12312                         _data[hash] = result;
12313                 }
12314
12315                 return result;
12316         }
12317
12318         PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12319         {
12320                 xpath_variable* var = add(name, xpath_type_boolean);
12321                 return var ? var->set(value) : false;
12322         }
12323
12324         PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12325         {
12326                 xpath_variable* var = add(name, xpath_type_number);
12327                 return var ? var->set(value) : false;
12328         }
12329
12330         PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12331         {
12332                 xpath_variable* var = add(name, xpath_type_string);
12333                 return var ? var->set(value) : false;
12334         }
12335
12336         PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12337         {
12338                 xpath_variable* var = add(name, xpath_type_node_set);
12339                 return var ? var->set(value) : false;
12340         }
12341
12342         PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12343         {
12344                 return _find(name);
12345         }
12346
12347         PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12348         {
12349                 return _find(name);
12350         }
12351
12352         PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12353         {
12354                 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12355
12356                 if (!qimpl)
12357                 {
12358                 #ifdef PUGIXML_NO_EXCEPTIONS
12359                         _result.error = "Out of memory";
12360                 #else
12361                         throw std::bad_alloc();
12362                 #endif
12363                 }
12364                 else
12365                 {
12366                         using impl::auto_deleter; // MSVC7 workaround
12367                         auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12368
12369                         qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12370
12371                         if (qimpl->root)
12372                         {
12373                                 qimpl->root->optimize(&qimpl->alloc);
12374
12375                                 _impl = impl.release();
12376                                 _result.error = 0;
12377                         }
12378                 }
12379         }
12380
12381         PUGI__FN xpath_query::xpath_query(): _impl(0)
12382         {
12383         }
12384
12385         PUGI__FN xpath_query::~xpath_query()
12386         {
12387                 if (_impl)
12388                         impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12389         }
12390
12391 #if __cplusplus >= 201103
12392         PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
12393         {
12394                 _impl = rhs._impl;
12395                 _result = rhs._result;
12396                 rhs._impl = 0;
12397                 rhs._result = xpath_parse_result();
12398         }
12399
12400         PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
12401         {
12402                 if (this == &rhs) return *this;
12403
12404                 if (_impl)
12405                         impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12406
12407                 _impl = rhs._impl;
12408                 _result = rhs._result;
12409                 rhs._impl = 0;
12410                 rhs._result = xpath_parse_result();
12411
12412                 return *this;
12413         }
12414 #endif
12415
12416         PUGI__FN xpath_value_type xpath_query::return_type() const
12417         {
12418                 if (!_impl) return xpath_type_none;
12419
12420                 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12421         }
12422
12423         PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12424         {
12425                 if (!_impl) return false;
12426
12427                 impl::xpath_context c(n, 1, 1);
12428                 impl::xpath_stack_data sd;
12429
12430         #ifdef PUGIXML_NO_EXCEPTIONS
12431                 if (setjmp(sd.error_handler)) return false;
12432         #endif
12433
12434                 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12435         }
12436
12437         PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12438         {
12439                 if (!_impl) return impl::gen_nan();
12440
12441                 impl::xpath_context c(n, 1, 1);
12442                 impl::xpath_stack_data sd;
12443
12444         #ifdef PUGIXML_NO_EXCEPTIONS
12445                 if (setjmp(sd.error_handler)) return impl::gen_nan();
12446         #endif
12447
12448                 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12449         }
12450
12451 #ifndef PUGIXML_NO_STL
12452         PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12453         {
12454                 impl::xpath_stack_data sd;
12455
12456                 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12457
12458                 return string_t(r.c_str(), r.length());
12459         }
12460 #endif
12461
12462         PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12463         {
12464                 impl::xpath_stack_data sd;
12465
12466                 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12467
12468                 size_t full_size = r.length() + 1;
12469
12470                 if (capacity > 0)
12471                 {
12472                         size_t size = (full_size < capacity) ? full_size : capacity;
12473                         assert(size > 0);
12474
12475                         memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12476                         buffer[size - 1] = 0;
12477                 }
12478
12479                 return full_size;
12480         }
12481
12482         PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12483         {
12484                 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12485                 if (!root) return xpath_node_set();
12486
12487                 impl::xpath_context c(n, 1, 1);
12488                 impl::xpath_stack_data sd;
12489
12490         #ifdef PUGIXML_NO_EXCEPTIONS
12491                 if (setjmp(sd.error_handler)) return xpath_node_set();
12492         #endif
12493
12494                 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12495
12496                 return xpath_node_set(r.begin(), r.end(), r.type());
12497         }
12498
12499         PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12500         {
12501                 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12502                 if (!root) return xpath_node();
12503
12504                 impl::xpath_context c(n, 1, 1);
12505                 impl::xpath_stack_data sd;
12506
12507         #ifdef PUGIXML_NO_EXCEPTIONS
12508                 if (setjmp(sd.error_handler)) return xpath_node();
12509         #endif
12510
12511                 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12512
12513                 return r.first();
12514         }
12515
12516         PUGI__FN const xpath_parse_result& xpath_query::result() const
12517         {
12518                 return _result;
12519         }
12520
12521         PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12522         {
12523         }
12524
12525         PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12526         {
12527                 return _impl ? unspecified_bool_xpath_query : 0;
12528         }
12529
12530         PUGI__FN bool xpath_query::operator!() const
12531         {
12532                 return !_impl;
12533         }
12534
12535         PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12536         {
12537                 xpath_query q(query, variables);
12538                 return select_node(q);
12539         }
12540
12541         PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12542         {
12543                 return query.evaluate_node(*this);
12544         }
12545
12546         PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12547         {
12548                 xpath_query q(query, variables);
12549                 return select_nodes(q);
12550         }
12551
12552         PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12553         {
12554                 return query.evaluate_node_set(*this);
12555         }
12556
12557         PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12558         {
12559                 xpath_query q(query, variables);
12560                 return select_single_node(q);
12561         }
12562
12563         PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12564         {
12565                 return query.evaluate_node(*this);
12566         }
12567 }
12568
12569 #endif
12570
12571 #ifdef __BORLANDC__
12572 #       pragma option pop
12573 #endif
12574
12575 // Intel C++ does not properly keep warning state for function templates,
12576 // so popping warning state at the end of translation unit leads to warnings in the middle.
12577 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12578 #       pragma warning(pop)
12579 #endif
12580
12581 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12582 #undef PUGI__NO_INLINE
12583 #undef PUGI__UNLIKELY
12584 #undef PUGI__STATIC_ASSERT
12585 #undef PUGI__DMC_VOLATILE
12586 #undef PUGI__MSVC_CRT_VERSION
12587 #undef PUGI__NS_BEGIN
12588 #undef PUGI__NS_END
12589 #undef PUGI__FN
12590 #undef PUGI__FN_NO_INLINE
12591 #undef PUGI__GETHEADER_IMPL
12592 #undef PUGI__GETPAGE_IMPL
12593 #undef PUGI__GETPAGE
12594 #undef PUGI__NODETYPE
12595 #undef PUGI__IS_CHARTYPE_IMPL
12596 #undef PUGI__IS_CHARTYPE
12597 #undef PUGI__IS_CHARTYPEX
12598 #undef PUGI__ENDSWITH
12599 #undef PUGI__SKIPWS
12600 #undef PUGI__OPTSET
12601 #undef PUGI__PUSHNODE
12602 #undef PUGI__POPNODE
12603 #undef PUGI__SCANFOR
12604 #undef PUGI__SCANWHILE
12605 #undef PUGI__SCANWHILE_UNROLL
12606 #undef PUGI__ENDSEG
12607 #undef PUGI__THROW_ERROR
12608 #undef PUGI__CHECK_ERROR
12609
12610 #endif
12611
12612 /**
12613  * Copyright (c) 2006-2016 Arseny Kapoulkine
12614  *
12615  * Permission is hereby granted, free of charge, to any person
12616  * obtaining a copy of this software and associated documentation
12617  * files (the "Software"), to deal in the Software without
12618  * restriction, including without limitation the rights to use,
12619  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12620  * copies of the Software, and to permit persons to whom the
12621  * Software is furnished to do so, subject to the following
12622  * conditions:
12623  *
12624  * The above copyright notice and this permission notice shall be
12625  * included in all copies or substantial portions of the Software.
12626  *
12627  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12628  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12629  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12630  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12631  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12632  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12633  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12634  * OTHER DEALINGS IN THE SOFTWARE.
12635  */