1 /*=============================================================================
2 Copyright (c) 2011, 2013 Daniel James
4 Use, modification and distribution is subject to the Boost Software
5 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
10 #include "document_state_impl.hpp"
11 #include <boost/make_shared.hpp>
12 #include <boost/unordered_map.hpp>
13 #include <boost/lexical_cast.hpp>
14 #include <boost/foreach.hpp>
15 #include <boost/range/algorithm.hpp>
17 // TODO: This should possibly try to always generate valid XML ids:
18 // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
22 // The maximum size of a generated part of an id.
24 // Not a strict maximum, sometimes broken because the user
25 // explicitly uses a longer id, or for backwards compatibility.
27 static const std::size_t max_size = 32;
29 typedef std::vector<id_placeholder const*> placeholder_index;
30 placeholder_index index_placeholders(document_state_impl const&, boost::string_ref);
32 void generate_id_block(
33 placeholder_index::iterator, placeholder_index::iterator,
34 std::vector<std::string>& generated_ids);
36 std::vector<std::string> generate_ids(document_state_impl const& state, boost::string_ref xml)
38 std::vector<std::string> generated_ids(state.placeholders.size());
40 // Get a list of the placeholders in the order that we wish to
42 placeholder_index placeholders = index_placeholders(state, xml);
44 typedef std::vector<id_placeholder const*>::iterator iterator;
45 iterator it = placeholders.begin(), end = placeholders.end();
48 // We process all the ids that have the same number of dots
49 // together. Note that ids with different parents can clash, e.g.
50 // because of old fashioned id generation or anchors containing
53 // So find the group of placeholders with the same number of dots.
54 iterator group_begin = it, group_end = it;
55 while (group_end != end && (*group_end)->num_dots == (*it)->num_dots)
58 generate_id_block(group_begin, group_end, generated_ids);
68 // Create a sorted index of the placeholders, in order
69 // to make numbering duplicates easy. A total order.
72 struct placeholder_compare
74 std::vector<unsigned>& order;
76 placeholder_compare(std::vector<unsigned>& order) : order(order) {}
78 bool operator()(id_placeholder const* x, id_placeholder const* y) const
80 bool x_explicit = x->category.c >= id_category::explicit_id;
81 bool y_explicit = y->category.c >= id_category::explicit_id;
84 x->num_dots < y->num_dots ? true :
85 x->num_dots > y->num_dots ? false :
86 x_explicit > y_explicit ? true :
87 x_explicit < y_explicit ? false :
88 order[x->index] < order[y->index];
92 struct get_placeholder_order_callback : xml_processor::callback
94 document_state_impl const& state;
95 std::vector<unsigned>& order;
98 get_placeholder_order_callback(document_state_impl const& state,
99 std::vector<unsigned>& order)
105 void id_value(boost::string_ref value)
107 set_placeholder_order(state.get_placeholder(value));
110 void set_placeholder_order(id_placeholder const* p)
112 if (p && !order[p->index]) {
113 set_placeholder_order(p->parent);
114 order[p->index] = ++count;
119 placeholder_index index_placeholders(
120 document_state_impl const& state,
121 boost::string_ref xml)
123 // The order that the placeholder appear in the xml source.
124 std::vector<unsigned> order(state.placeholders.size());
126 xml_processor processor;
127 get_placeholder_order_callback callback(state, order);
128 processor.parse(xml, callback);
130 placeholder_index sorted_placeholders;
131 sorted_placeholders.reserve(state.placeholders.size());
132 BOOST_FOREACH(id_placeholder const& p, state.placeholders)
133 if (order[p.index]) sorted_placeholders.push_back(&p);
134 boost::sort(sorted_placeholders, placeholder_compare(order));
136 return sorted_placeholders;
139 // Resolve and generate ids.
141 struct generate_id_block_type
143 // The ids which won't require duplicate handling.
144 typedef boost::unordered_map<std::string, id_placeholder const*>
146 chosen_id_map chosen_ids;
147 std::vector<std::string>& generated_ids;
149 generate_id_block_type(std::vector<std::string>& generated_ids) :
150 generated_ids(generated_ids) {}
152 void generate(placeholder_index::iterator begin,
153 placeholder_index::iterator end);
155 std::string resolve_id(id_placeholder const*);
156 std::string generate_id(id_placeholder const*, std::string const&);
159 void generate_id_block(placeholder_index::iterator begin,
160 placeholder_index::iterator end,
161 std::vector<std::string>& generated_ids)
163 generate_id_block_type impl(generated_ids);
164 impl.generate(begin, end);
167 void generate_id_block_type::generate(placeholder_index::iterator begin,
168 placeholder_index::iterator end)
170 std::vector<std::string> resolved_ids;
172 for (placeholder_index::iterator i = begin; i != end; ++i)
173 resolved_ids.push_back(resolve_id(*i));
176 for (placeholder_index::iterator i = begin; i != end; ++i, ++index)
178 generated_ids[(**i).index] =
179 generate_id(*i, resolved_ids[index]);
183 std::string generate_id_block_type::resolve_id(id_placeholder const* p)
185 std::string id = p->parent ?
186 generated_ids[p->parent->index] + "." + p->id :
189 if (p->category.c > id_category::numbered) {
190 // Reserve the id if it isn't already reserved.
191 chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first;
193 // If it was reserved by a placeholder with a lower category,
194 // then overwrite it.
195 if (p->category.c > pos->second->category.c)
202 std::string generate_id_block_type::generate_id(id_placeholder const* p,
203 std::string const& resolved_id)
205 if (p->category.c > id_category::numbered &&
206 chosen_ids.at(resolved_id) == p)
211 // Split the id into its parent part and child part.
213 // Note: can't just use the placeholder's parent, as the
214 // placeholder id might contain dots.
215 std::size_t child_start = resolved_id.rfind('.');
216 std::string parent_id, base_id;
218 if (child_start == std::string::npos) {
219 base_id = normalize_id(resolved_id, max_size - 1);
222 parent_id = resolved_id.substr(0, child_start + 1);
223 base_id = normalize_id(resolved_id.substr(child_start + 1),
227 // Since we're adding digits, don't want an id that ends in
230 unsigned int length = base_id.size();
232 if (length > 0 && std::isdigit(base_id[length - 1])) {
233 if (length < max_size - 1) {
238 while (length > 0 && std::isdigit(base_id[length -1]))
240 base_id.erase(length);
248 std::string postfix =
249 boost::lexical_cast<std::string>(count++);
251 if ((base_id.size() + postfix.size()) > max_size) {
252 // The id is now too long, so reduce the length and
255 // Would need a lot of ids to get this far....
256 if (length == 0) throw std::runtime_error("Too many ids");
261 // Trim any trailing digits.
262 while (length > 0 && std::isdigit(base_id[length -1]))
265 base_id.erase(length);
269 // Try to reserve this id.
270 std::string generated_id = parent_id + base_id + postfix;
272 if (chosen_ids.emplace(generated_id, p).second) {
282 // Return a copy of the xml with all the placeholders replaced by
286 struct replace_ids_callback : xml_processor::callback
288 document_state_impl const& state;
289 std::vector<std::string> const* ids;
290 boost::string_ref::const_iterator source_pos;
293 replace_ids_callback(document_state_impl const& state,
294 std::vector<std::string> const* ids)
301 void start(boost::string_ref xml)
303 source_pos = xml.begin();
306 void id_value(boost::string_ref value)
308 if (id_placeholder const* p = state.get_placeholder(value))
310 boost::string_ref id = ids ?
311 (*ids)[p->index] : p->unresolved_id;
313 result.append(source_pos, value.begin());
314 result.append(id.begin(), id.end());
315 source_pos = value.end();
319 void finish(boost::string_ref xml)
321 result.append(source_pos, xml.end());
322 source_pos = xml.end();
326 std::string replace_ids(document_state_impl const& state, boost::string_ref xml,
327 std::vector<std::string> const* ids)
329 xml_processor processor;
330 replace_ids_callback callback(state, ids);
331 processor.parse(xml, callback);
332 return callback.result;
338 // Normalizes generated ids.
341 std::string normalize_id(boost::string_ref src_id)
343 return normalize_id(src_id, max_size);
346 std::string normalize_id(boost::string_ref src_id, std::size_t size)
348 std::string id(src_id.begin(), src_id.end());
353 while (src < id.length() && id[src] == '_') {
357 if (src == id.length()) {
361 while (src < id.length() && dst < size) {
362 if (id[src] == '_') {
365 } while(src < id.length() && id[src] == '_');
367 if (src < id.length()) id[dst++] = '_';
370 id[dst++] = id[src++];