Imported Upstream version 1.57.0
[platform/upstream/boost.git] / tools / quickbook / src / id_generation.cpp
1 /*=============================================================================
2     Copyright (c) 2011, 2013 Daniel James
3
4     Use, modification and distribution is subject to the Boost Software
5     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6     http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8
9 #include <cctype>
10 #include "document_state_impl.hpp"
11 #include <boost/make_shared.hpp>
12 #include <boost/unordered_map.hpp>
13 #include <boost/lexical_cast.hpp>
14 #include <boost/foreach.hpp>
15 #include <boost/range/algorithm.hpp>
16
17 // TODO: This should possibly try to always generate valid XML ids:
18 // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
19
20 namespace quickbook {
21     //
22     // The maximum size of a generated part of an id.
23     //
24     // Not a strict maximum, sometimes broken because the user
25     // explicitly uses a longer id, or for backwards compatibility.
26
27     static const std::size_t max_size = 32;
28
29     typedef std::vector<id_placeholder const*> placeholder_index;
30     placeholder_index index_placeholders(document_state_impl const&, boost::string_ref);
31
32     void generate_id_block(
33             placeholder_index::iterator, placeholder_index::iterator,
34             std::vector<std::string>& generated_ids);
35
36     std::vector<std::string> generate_ids(document_state_impl const& state, boost::string_ref xml)
37     {
38         std::vector<std::string> generated_ids(state.placeholders.size());
39
40         // Get a list of the placeholders in the order that we wish to
41         // process them.
42         placeholder_index placeholders = index_placeholders(state, xml);
43
44         typedef std::vector<id_placeholder const*>::iterator iterator;
45         iterator it = placeholders.begin(), end = placeholders.end();
46
47         while (it != end) {
48             // We process all the ids that have the same number of dots
49             // together. Note that ids with different parents can clash, e.g.
50             // because of old fashioned id generation or anchors containing
51             // multiple dots.
52             //
53             // So find the group of placeholders with the same number of dots.
54             iterator group_begin = it, group_end = it;
55             while (group_end != end && (*group_end)->num_dots == (*it)->num_dots)
56                 ++group_end;
57
58             generate_id_block(group_begin, group_end, generated_ids);
59             it = group_end;
60         }
61
62         return generated_ids;
63     }
64
65     //
66     // index_placeholders
67     //
68     // Create a sorted index of the placeholders, in order
69     // to make numbering duplicates easy. A total order.
70     //
71
72     struct placeholder_compare
73     {
74         std::vector<unsigned>& order;
75
76         placeholder_compare(std::vector<unsigned>& order) : order(order) {}
77
78         bool operator()(id_placeholder const* x, id_placeholder const* y) const
79         {
80             bool x_explicit = x->category.c >= id_category::explicit_id;
81             bool y_explicit = y->category.c >= id_category::explicit_id;
82
83             return
84                 x->num_dots < y->num_dots ? true :
85                 x->num_dots > y->num_dots ? false :
86                 x_explicit > y_explicit ? true :
87                 x_explicit < y_explicit ? false :
88                 order[x->index] < order[y->index];
89         }
90     };
91
92     struct get_placeholder_order_callback : xml_processor::callback
93     {
94         document_state_impl const& state;
95         std::vector<unsigned>& order;
96         unsigned count;
97
98         get_placeholder_order_callback(document_state_impl const& state,
99                 std::vector<unsigned>& order)
100           : state(state),
101             order(order),
102             count(0)
103         {}
104
105         void id_value(boost::string_ref value)
106         {
107             set_placeholder_order(state.get_placeholder(value));
108         }
109
110         void set_placeholder_order(id_placeholder const* p)
111         {
112             if (p && !order[p->index]) {
113                 set_placeholder_order(p->parent);
114                 order[p->index] = ++count;
115             }
116         }
117     };
118
119     placeholder_index index_placeholders(
120             document_state_impl const& state,
121             boost::string_ref xml)
122     {
123         // The order that the placeholder appear in the xml source.
124         std::vector<unsigned> order(state.placeholders.size());
125
126         xml_processor processor;
127         get_placeholder_order_callback callback(state, order);
128         processor.parse(xml, callback);
129
130         placeholder_index sorted_placeholders;
131         sorted_placeholders.reserve(state.placeholders.size());
132         BOOST_FOREACH(id_placeholder const& p, state.placeholders)
133             if (order[p.index]) sorted_placeholders.push_back(&p);
134         boost::sort(sorted_placeholders, placeholder_compare(order));
135
136         return sorted_placeholders;
137     }
138
139     // Resolve and generate ids.
140
141     struct generate_id_block_type
142     {
143         // The ids which won't require duplicate handling.
144         typedef boost::unordered_map<std::string, id_placeholder const*>
145             chosen_id_map;
146         chosen_id_map chosen_ids;
147         std::vector<std::string>& generated_ids;
148
149         generate_id_block_type(std::vector<std::string>& generated_ids) :
150             generated_ids(generated_ids) {}
151
152         void generate(placeholder_index::iterator begin,
153             placeholder_index::iterator end);
154
155         std::string resolve_id(id_placeholder const*);
156         std::string generate_id(id_placeholder const*, std::string const&);
157     };
158
159     void generate_id_block(placeholder_index::iterator begin,
160             placeholder_index::iterator end,
161             std::vector<std::string>& generated_ids)
162     {
163         generate_id_block_type impl(generated_ids);
164         impl.generate(begin, end);
165     }
166
167     void generate_id_block_type::generate(placeholder_index::iterator begin,
168             placeholder_index::iterator end)
169     {
170         std::vector<std::string> resolved_ids;
171
172         for (placeholder_index::iterator i = begin; i != end; ++i)
173             resolved_ids.push_back(resolve_id(*i));
174
175         unsigned index = 0;
176         for (placeholder_index::iterator i = begin; i != end; ++i, ++index)
177         {
178             generated_ids[(**i).index] =
179                 generate_id(*i, resolved_ids[index]);
180         }
181     }
182
183     std::string generate_id_block_type::resolve_id(id_placeholder const* p)
184     {
185         std::string id = p->parent ?
186             generated_ids[p->parent->index] + "." + p->id :
187             p->id;
188
189         if (p->category.c > id_category::numbered) {
190             // Reserve the id if it isn't already reserved.
191             chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first;
192
193             // If it was reserved by a placeholder with a lower category,
194             // then overwrite it.
195             if (p->category.c > pos->second->category.c)
196                 pos->second = p;
197         }
198
199         return id;
200     }
201
202     std::string generate_id_block_type::generate_id(id_placeholder const* p,
203             std::string const& resolved_id)
204     {
205         if (p->category.c > id_category::numbered &&
206                 chosen_ids.at(resolved_id) == p)
207         {
208             return resolved_id;
209         }
210
211         // Split the id into its parent part and child part.
212         //
213         // Note: can't just use the placeholder's parent, as the
214         // placeholder id might contain dots.
215         std::size_t child_start = resolved_id.rfind('.');
216         std::string parent_id, base_id;
217
218         if (child_start == std::string::npos) {
219             base_id = normalize_id(resolved_id, max_size - 1);
220         }
221         else {
222             parent_id = resolved_id.substr(0, child_start + 1);
223             base_id = normalize_id(resolved_id.substr(child_start + 1),
224                     max_size - 1);
225         }
226
227         // Since we're adding digits, don't want an id that ends in
228         // a digit.
229
230         unsigned int length = base_id.size();
231
232         if (length > 0 && std::isdigit(base_id[length - 1])) {
233             if (length < max_size - 1) {
234                 base_id += '_';
235                 ++length;
236             }
237             else {
238                 while (length > 0 && std::isdigit(base_id[length -1]))
239                     --length;
240                 base_id.erase(length);
241             }
242         }
243
244         unsigned count = 0;
245
246         while (true)
247         {
248             std::string postfix =
249                 boost::lexical_cast<std::string>(count++);
250
251             if ((base_id.size() + postfix.size()) > max_size) {
252                 // The id is now too long, so reduce the length and
253                 // start again.
254
255                 // Would need a lot of ids to get this far....
256                 if (length == 0) throw std::runtime_error("Too many ids");
257
258                 // Trim a character.
259                 --length;
260
261                 // Trim any trailing digits.
262                 while (length > 0 && std::isdigit(base_id[length -1]))
263                     --length;
264
265                 base_id.erase(length);
266                 count = 0;
267             }
268             else {
269                 // Try to reserve this id.
270                 std::string generated_id = parent_id + base_id + postfix;
271
272                 if (chosen_ids.emplace(generated_id, p).second) {
273                     return generated_id;
274                 }
275             }
276         }
277     }
278
279     //
280     // replace_ids
281     //
282     // Return a copy of the xml with all the placeholders replaced by
283     // generated_ids.
284     //
285
286     struct replace_ids_callback : xml_processor::callback
287     {
288         document_state_impl const& state;
289         std::vector<std::string> const* ids;
290         boost::string_ref::const_iterator source_pos;
291         std::string result;
292
293         replace_ids_callback(document_state_impl const& state,
294                 std::vector<std::string> const* ids)
295           : state(state),
296             ids(ids),
297             source_pos(),
298             result()
299         {}
300
301         void start(boost::string_ref xml)
302         {
303             source_pos = xml.begin();
304         }
305
306         void id_value(boost::string_ref value)
307         {
308             if (id_placeholder const* p = state.get_placeholder(value))
309             {
310                 boost::string_ref id = ids ?
311                     (*ids)[p->index] : p->unresolved_id;
312
313                 result.append(source_pos, value.begin());
314                 result.append(id.begin(), id.end());
315                 source_pos = value.end();
316             }
317         }
318
319         void finish(boost::string_ref xml)
320         {
321             result.append(source_pos, xml.end());
322             source_pos = xml.end();
323         }
324     };
325
326     std::string replace_ids(document_state_impl const& state, boost::string_ref xml,
327             std::vector<std::string> const* ids)
328     {
329         xml_processor processor;
330         replace_ids_callback callback(state, ids);
331         processor.parse(xml, callback);
332         return callback.result;
333     }
334
335     //
336     // normalize_id
337     //
338     // Normalizes generated ids.
339     //
340
341     std::string normalize_id(boost::string_ref src_id)
342     {
343         return normalize_id(src_id, max_size);
344     }
345
346     std::string normalize_id(boost::string_ref src_id, std::size_t size)
347     {
348         std::string id(src_id.begin(), src_id.end());
349
350         std::size_t src = 0;
351         std::size_t dst = 0;
352
353         while (src < id.length() && id[src] == '_') {
354             ++src;
355         }
356
357         if (src == id.length()) {
358             id = "_";
359         }
360         else {
361             while (src < id.length() && dst < size) {
362                 if (id[src] == '_') {
363                     do {
364                         ++src;
365                     } while(src < id.length() && id[src] == '_');
366
367                     if (src < id.length()) id[dst++] = '_';
368                 }
369                 else {
370                     id[dst++] = id[src++];
371                 }
372             }
373
374             id.erase(dst);
375         }
376
377         return id;
378     }
379 }