Imported Upstream version 1.57.0
[platform/upstream/boost.git] / tools / quickbook / src / syntax_highlight.cpp
1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include <boost/spirit/include/classic_core.hpp>
11 #include <boost/spirit/include/classic_confix.hpp>
12 #include <boost/spirit/include/classic_chset.hpp>
13 #include <boost/spirit/include/classic_symbols.hpp>
14 #include <boost/spirit/include/classic_loops.hpp>
15 #include "grammar.hpp"
16 #include "state.hpp"
17 #include "actions.hpp"
18 #include "syntax_highlight.hpp"
19 #include "utils.hpp"
20 #include "files.hpp"
21 #include "native_text.hpp"
22 #include "phrase_tags.hpp"
23
24 namespace quickbook
25 {    
26     namespace cl = boost::spirit::classic;
27
28     // Syntax Highlight Actions
29
30     struct syntax_highlight_actions
31     {
32         quickbook::state& state;
33         do_macro_action do_macro_impl;
34
35         // State
36         bool support_callouts;
37         boost::string_ref marked_text;
38
39         syntax_highlight_actions(quickbook::state& state, bool is_block) :
40             state(state),
41             do_macro_impl(state),
42             support_callouts(is_block && (qbk_version_n >= 107u ||
43                 state.current_file->is_code_snippets)),
44             marked_text()
45         {}
46
47         void span(parse_iterator, parse_iterator, char const*);
48         void span_start(parse_iterator, parse_iterator, char const*);
49         void span_end(parse_iterator, parse_iterator);
50         void unexpected_char(parse_iterator, parse_iterator);
51         void plain_char(parse_iterator, parse_iterator);
52         void pre_escape_back(parse_iterator, parse_iterator);
53         void post_escape_back(parse_iterator, parse_iterator);
54         void do_macro(std::string const&);
55
56         void mark_text(parse_iterator, parse_iterator);
57         void callout(parse_iterator, parse_iterator);
58     };
59
60     void syntax_highlight_actions::span(parse_iterator first,
61             parse_iterator last, char const* name)
62     {
63         state.phrase << "<phrase role=\"" << name << "\">";
64         while (first != last)
65             detail::print_char(*first++, state.phrase.get());
66         state.phrase << "</phrase>";
67     }
68
69     void syntax_highlight_actions::span_start(parse_iterator first,
70             parse_iterator last, char const* name)
71     {
72         state.phrase << "<phrase role=\"" << name << "\">";
73         while (first != last)
74             detail::print_char(*first++, state.phrase.get());
75     }
76
77     void syntax_highlight_actions::span_end(parse_iterator first,
78             parse_iterator last)
79     {
80         while (first != last)
81             detail::print_char(*first++, state.phrase.get());
82         state.phrase << "</phrase>";
83     }
84
85     void syntax_highlight_actions::unexpected_char(parse_iterator first,
86             parse_iterator last)
87     {
88         file_position const pos = state.current_file->position_of(first.base());
89
90         detail::outwarn(state.current_file->path, pos.line)
91             << "in column:" << pos.column
92             << ", unexpected character: " << std::string(first.base(), last.base())
93             << "\n";
94
95         // print out an unexpected character
96         state.phrase << "<phrase role=\"error\">";
97         while (first != last)
98             detail::print_char(*first++, state.phrase.get());
99         state.phrase << "</phrase>";
100     }
101
102     void syntax_highlight_actions::plain_char(parse_iterator first,
103             parse_iterator last)
104     {
105         while (first != last)
106             detail::print_char(*first++, state.phrase.get());
107     }
108
109     void syntax_highlight_actions::pre_escape_back(parse_iterator,
110             parse_iterator)
111     {
112         state.push_output(); // save the stream
113     }
114
115     void syntax_highlight_actions::post_escape_back(parse_iterator,
116             parse_iterator)
117     {
118         std::string tmp;
119         state.phrase.swap(tmp);
120         state.pop_output(); // restore the stream
121         state.phrase << tmp;
122     }
123
124     void syntax_highlight_actions::do_macro(std::string const& v)
125     {
126         do_macro_impl(v);
127     }
128
129     void syntax_highlight_actions::mark_text(parse_iterator first,
130             parse_iterator last)
131     {
132         marked_text = boost::string_ref(first.base(), last.base() - first.base());
133     }
134
135     void syntax_highlight_actions::callout(parse_iterator, parse_iterator)
136     {
137         state.phrase << state.add_callout(qbk_value(state.current_file,
138             marked_text.begin(), marked_text.end()));
139         marked_text.clear();
140     }
141
142     // Syntax
143
144     struct keywords_holder
145     {
146         cl::symbols<> cpp, python;
147
148         keywords_holder()
149         {
150             cpp
151                     =   "alignas", "alignof", "and_eq", "and", "asm", "auto",
152                         "bitand", "bitor", "bool", "break", "case", "catch",
153                         "char", "char16_t", "char32_t", "class", "compl",
154                         "const", "const_cast", "constexpr", "continue",
155                         "decltype", "default", "delete", "do", "double",
156                         "dynamic_cast",  "else", "enum", "explicit", "export",
157                         "extern", "false", "float", "for", "friend", "goto",
158                         "if", "inline", "int", "long", "mutable", "namespace",
159                         "new", "noexcept", "not_eq", "not", "nullptr",
160                         "operator", "or_eq", "or", "private", "protected",
161                         "public", "register", "reinterpret_cast", "return",
162                         "short", "signed", "sizeof", "static", "static_assert",
163                         "static_cast", "struct", "switch", "template", "this",
164                         "thread_local", "throw", "true", "try", "typedef",
165                         "typeid", "typename", "union", "unsigned", "using",
166                         "virtual", "void", "volatile", "wchar_t", "while",
167                         "xor_eq", "xor"
168                     ;
169
170             python
171                     =
172                     "and",       "del",       "for",       "is",        "raise",
173                     "assert",    "elif",      "from",      "lambda",    "return",
174                     "break",     "else",      "global",    "not",       "try",
175                     "class",     "except",    "if",        "or",        "while",
176                     "continue",  "exec",      "import",    "pass",      "yield",
177                     "def",       "finally",   "in",        "print",
178
179                     // Technically "as" and "None" are not yet keywords (at Python
180                     // 2.4). They are destined to become keywords, and we treat them
181                     // as such for syntax highlighting purposes.
182
183                     "as", "None"
184                     ;
185         }
186     };
187
188     namespace {
189         keywords_holder keywords;
190     }
191
192     // Grammar for C++ highlighting
193     struct cpp_highlight : public cl::grammar<cpp_highlight>
194     {
195         cpp_highlight(syntax_highlight_actions& actions)
196             : actions(actions) {}
197
198         template <typename Scanner>
199         struct definition
200         {
201             definition(cpp_highlight const& self)
202                 : g(self.actions.state.grammar())
203             {
204                 member_action1<syntax_highlight_actions, char const*>
205                     span(self.actions, &syntax_highlight_actions::span),
206                     span_start(self.actions, &syntax_highlight_actions::span_start);
207                 member_action<syntax_highlight_actions>
208                     span_end(self.actions, &syntax_highlight_actions::span_end),
209                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
210                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
211                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
212                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
213                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
214                     callout(self.actions, &syntax_highlight_actions::callout);
215                 member_action_value<syntax_highlight_actions, std::string const&>
216                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
217                 error_action error(self.actions.state);
218
219                 program =
220                     *(  (*cl::space_p)                  [plain_char]
221                     >>  (line_start | rest_of_line)
222                     >>  *rest_of_line
223                     )
224                     ;
225
226                 line_start =
227                         preprocessor                    [span("preprocessor")]
228                     ;
229                 
230                 rest_of_line = 
231                         (+cl::blank_p)                  [plain_char]
232                     |   macro
233                     |   escape
234                     |   cl::eps_p(ph::var(self.actions.support_callouts))
235                     >>  (   line_callout                [callout]
236                         |   inline_callout              [callout]
237                         )
238                     |   comment
239                     |   keyword                         [span("keyword")]
240                     |   identifier                      [span("identifier")]
241                     |   special                         [span("special")]
242                     |   string_                         [span("string")]
243                     |   char_                           [span("char")]
244                     |   number                          [span("number")]
245                     |   ~cl::eps_p(cl::eol_p)
246                     >>  u8_codepoint_p                  [unexpected_char]
247                     ;
248
249                 macro =
250                     // must not be followed by alpha or underscore
251                     cl::eps_p(self.actions.state.macro
252                         >> (cl::eps_p - (cl::alpha_p | '_')))
253                     >> self.actions.state.macro
254                                                         [do_macro]
255                     ;
256
257                 escape =
258                     cl::str_p("``")                     [pre_escape_back]
259                     >>
260                     (
261                         (
262                             (
263                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
264                                 & g.phrase_start
265                             )
266                             >>  cl::str_p("``")
267                         )
268                         |
269                         (
270                             cl::eps_p                   [error]
271                             >> *cl::anychar_p
272                         )
273                     )                                   [post_escape_back]
274                     ;
275
276                 preprocessor
277                     =   '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_'))
278                     ;
279
280                 inline_callout
281                     =   cl::confix_p(
282                             "/*<" >> *cl::space_p,
283                             (*cl::anychar_p)            [mark_text],
284                             ">*/"
285                         )
286                         ;
287
288                 line_callout
289                     =   cl::confix_p(
290                             "/*<<" >> *cl::space_p,
291                             (*cl::anychar_p)            [mark_text],
292                             ">>*/"
293                         )
294                     >>  *cl::space_p
295                     ;
296
297                 comment
298                     =   cl::str_p("//")                 [span_start("comment")]
299                     >>  *(  escape
300                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
301                                                         [plain_char]
302                         )
303                     >>  cl::eps_p                       [span_end]
304                     |   cl::str_p("/*")                 [span_start("comment")]
305                     >>  *(  escape
306                         |   (+(cl::anychar_p - (cl::str_p("*/") | "``")))
307                                                         [plain_char]
308                         )
309                     >>  (!cl::str_p("*/"))              [span_end]
310                     ;
311
312                 keyword
313                     =   keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_'))
314                     ;   // make sure we recognize whole words only
315
316                 special
317                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-")
318                     ;
319
320                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
321
322                 string_
323                     =   !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"')
324                     ;
325
326                 char_
327                     =   !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'')
328                     ;
329
330                 number
331                     =   (
332                             cl::as_lower_d["0x"] >> cl::hex_p
333                         |   '0' >> cl::oct_p
334                         |   cl::real_p
335                         )
336                         >>  *cl::as_lower_d[cl::chset_p("ldfu")]
337                     ;
338
339                 identifier
340                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
341                     ;
342             }
343
344             cl::rule<Scanner>
345                             program, line_start, rest_of_line, macro, preprocessor,
346                             inline_callout, line_callout, comment,
347                             special, string_, 
348                             char_, number, identifier, keyword, escape,
349                             string_char;
350
351             quickbook_grammar& g;
352
353             cl::rule<Scanner> const&
354             start() const { return program; }
355         };
356
357         syntax_highlight_actions& actions;
358     };
359
360     // Grammar for Python highlighting
361     // See also: The Python Reference Manual
362     // http://docs.python.org/ref/ref.html
363     struct python_highlight : public cl::grammar<python_highlight>
364     {
365         python_highlight(syntax_highlight_actions& actions)
366             : actions(actions) {}
367
368         template <typename Scanner>
369         struct definition
370         {
371             definition(python_highlight const& self)
372                 : g(self.actions.state.grammar())
373             {
374                 member_action1<syntax_highlight_actions, char const*>
375                     span(self.actions, &syntax_highlight_actions::span),
376                     span_start(self.actions, &syntax_highlight_actions::span_start);
377                 member_action<syntax_highlight_actions>
378                     span_end(self.actions, &syntax_highlight_actions::span_end),
379                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
380                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
381                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
382                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
383                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
384                     callout(self.actions, &syntax_highlight_actions::callout);
385                 member_action_value<syntax_highlight_actions, std::string const&>
386                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
387                 error_action error(self.actions.state);
388
389                 program
390                     =
391                     *(  (+cl::space_p)                  [plain_char]
392                     |   macro
393                     |   escape          
394                     |   comment
395                     |   keyword                         [span("keyword")]
396                     |   identifier                      [span("identifier")]
397                     |   special                         [span("special")]
398                     |   string_                         [span("string")]
399                     |   number                          [span("number")]
400                     |   u8_codepoint_p                  [unexpected_char]
401                     )
402                     ;
403
404                 macro = 
405                     // must not be followed by alpha or underscore
406                     cl::eps_p(self.actions.state.macro
407                         >> (cl::eps_p - (cl::alpha_p | '_')))
408                     >> self.actions.state.macro
409                                                         [do_macro]
410                     ;
411
412                 escape =
413                     cl::str_p("``")                     [pre_escape_back]
414                     >>
415                     (
416                         (
417                             (
418                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
419                                 & g.phrase_start
420                             )
421                             >>  cl::str_p("``")
422                         )
423                         |
424                         (
425                             cl::eps_p                   [error]
426                             >> *cl::anychar_p
427                         )
428                     )                                   [post_escape_back]
429                     ;
430
431                 comment
432                     =   cl::str_p("#")                  [span_start("comment")]
433                     >>  *(  escape
434                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
435                                                         [plain_char]
436                         )
437                     >>  cl::eps_p                       [span_end]
438                     ;
439
440                 keyword
441                     =   keywords.python >> (cl::eps_p - (cl::alnum_p | '_'))
442                     ;   // make sure we recognize whole words only
443
444                 special
445                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-")
446                     ;
447
448                 string_prefix
449                     =    cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")]
450                     ;
451                 
452                 string_
453                     =   ! string_prefix >> (long_string | short_string)
454                     ;
455
456                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
457             
458                 short_string
459                     =   cl::confix_p('\'', * string_char, '\'') |
460                         cl::confix_p('"', * string_char, '"')
461                     ;
462             
463                 long_string
464                     // Note: the "cl::str_p" on the next two lines work around
465                     // an INTERNAL COMPILER ERROR when using VC7.1
466                     =   cl::confix_p(cl::str_p("'''"), * string_char, "'''") |
467                         cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"")
468                     ;
469                 
470                 number
471                     =   (
472                             cl::as_lower_d["0x"] >> cl::hex_p
473                         |   '0' >> cl::oct_p
474                         |   cl::real_p
475                         )
476                         >>  *cl::as_lower_d[cl::chset_p("lj")]
477                     ;
478
479                 identifier
480                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
481                     ;
482             }
483
484             cl::rule<Scanner>
485                             program, macro, comment, special, string_, string_prefix, 
486                             short_string, long_string, number, identifier, keyword, 
487                             escape, string_char;
488
489             quickbook_grammar& g;
490
491             cl::rule<Scanner> const&
492             start() const { return program; }
493         };
494
495         syntax_highlight_actions& actions;
496     };
497
498     // Grammar for plain text (no actual highlighting)
499     struct teletype_highlight : public cl::grammar<teletype_highlight>
500     {
501         teletype_highlight(syntax_highlight_actions& actions)
502             : actions(actions) {}
503
504         template <typename Scanner>
505         struct definition
506         {
507             definition(teletype_highlight const& self)
508                 : g(self.actions.state.grammar())
509             {
510                 member_action<syntax_highlight_actions>
511                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
512                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
513                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back);
514                 member_action_value<syntax_highlight_actions, std::string const&>
515                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
516                 error_action error(self.actions.state);
517
518                 program
519                     =
520                     *(  macro
521                     |   escape          
522                     |   u8_codepoint_p                  [plain_char]
523                     )
524                     ;
525
526                 macro =
527                     // must not be followed by alpha or underscore
528                     cl::eps_p(self.actions.state.macro
529                         >> (cl::eps_p - (cl::alpha_p | '_')))
530                     >> self.actions.state.macro
531                                                         [do_macro]
532                     ;
533
534                 escape =
535                     cl::str_p("``")                     [pre_escape_back]
536                     >>
537                     (
538                         (
539                             (
540                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
541                                 & g.phrase_start
542                             )
543                             >>  cl::str_p("``")
544                         )
545                         |
546                         (
547                             cl::eps_p                   [error]
548                             >> *cl::anychar_p
549                         )
550                     )                                   [post_escape_back]
551                     ;
552             }
553
554             cl::rule<Scanner> program, macro, escape;
555
556             quickbook_grammar& g;
557
558             cl::rule<Scanner> const&
559             start() const { return program; }
560         };
561
562         syntax_highlight_actions& actions;
563     };
564
565     void syntax_highlight(
566         parse_iterator first,
567         parse_iterator last,
568         quickbook::state& state,
569         source_mode_type source_mode,
570         bool is_block)
571     {
572         syntax_highlight_actions syn_actions(state, is_block);
573
574         // print the code with syntax coloring
575         switch(source_mode)
576         {
577             case source_mode_tags::cpp: {
578                 cpp_highlight cpp_p(syn_actions);
579                 boost::spirit::classic::parse(first, last, cpp_p);
580                 break;
581             }
582             case source_mode_tags::python: {
583                 python_highlight python_p(syn_actions);
584                 boost::spirit::classic::parse(first, last, python_p);
585                 break;
586             }
587             case source_mode_tags::teletype: {
588                 teletype_highlight teletype_p(syn_actions);
589                 boost::spirit::classic::parse(first, last, teletype_p);
590                 break;
591             }
592             default:
593                 BOOST_ASSERT(0);
594         }
595     }
596 }