e618c07dd603add5bdd72a051df46a30bb1c2d7d
[platform/upstream/boost.git] / tools / quickbook / src / syntax_highlight.cpp
1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include <boost/spirit/include/classic_core.hpp>
11 #include <boost/spirit/include/classic_confix.hpp>
12 #include <boost/spirit/include/classic_chset.hpp>
13 #include <boost/spirit/include/classic_symbols.hpp>
14 #include <boost/spirit/include/classic_loops.hpp>
15 #include "grammar.hpp"
16 #include "grammar_impl.hpp" // Just for context stuff. Should move?
17 #include "state.hpp"
18 #include "actions.hpp"
19 #include "utils.hpp"
20 #include "files.hpp"
21 #include "input_path.hpp"
22
23 namespace quickbook
24 {    
25     namespace cl = boost::spirit::classic;
26
27     template <typename T, typename Value>
28     struct member_action_value
29     {
30         typedef void(T::*member_function)(Value);
31
32         T& l;
33         member_function mf;
34
35         member_action_value(T& l, member_function mf) : l(l), mf(mf) {}
36
37         void operator()(Value v) const {
38             (l.*mf)(v);
39         }
40     };
41
42     template <typename T>
43     struct member_action
44     {
45         typedef void(T::*member_function)(parse_iterator, parse_iterator);
46
47         T& l;
48         member_function mf;
49
50         member_action(T& l, member_function mf) : l(l), mf(mf) {}
51
52         void operator()(parse_iterator first, parse_iterator last) const {
53             (l.*mf)(first, last);
54         }
55     };
56
57     template <typename T, typename Arg1>
58     struct member_action1
59     {
60         typedef void(T::*member_function)(parse_iterator, parse_iterator, Arg1);
61
62         T& l;
63         member_function mf;
64
65         member_action1(T& l, member_function mf) : l(l), mf(mf) {}
66
67         struct impl
68         {
69             member_action1 a;
70             Arg1 value;
71
72             impl(member_action1& a, Arg1 value) :
73                 a(a), value(value)
74             {}
75
76             void operator()(parse_iterator first, parse_iterator last) const {
77                 (a.l.*a.mf)(first, last, value);
78             }
79         };
80
81         impl operator()(Arg1 a1) {
82             return impl(*this, a1);
83         }
84     };
85
86     // Syntax Highlight Actions
87
88     struct syntax_highlight_actions
89     {
90         quickbook::collector out;
91         quickbook::state& state;
92         do_macro_action do_macro_impl;
93
94         // State
95         bool support_callouts;
96         string_ref marked_text;
97
98         syntax_highlight_actions(quickbook::state& state, bool is_block) :
99             out(), state(state),
100             do_macro_impl(out, state),
101             support_callouts(is_block && (qbk_version_n >= 107u ||
102                 state.current_file->is_code_snippets)),
103             marked_text()
104         {}
105
106         void span(parse_iterator, parse_iterator, char const*);
107         void span_start(parse_iterator, parse_iterator, char const*);
108         void span_end(parse_iterator, parse_iterator);
109         void unexpected_char(parse_iterator, parse_iterator);
110         void plain_char(parse_iterator, parse_iterator);
111         void pre_escape_back(parse_iterator, parse_iterator);
112         void post_escape_back(parse_iterator, parse_iterator);
113         void do_macro(std::string const&);
114
115         void mark_text(parse_iterator, parse_iterator);
116         void callout(parse_iterator, parse_iterator);
117     };
118
119     void syntax_highlight_actions::span(parse_iterator first,
120             parse_iterator last, char const* name)
121     {
122         out << "<phrase role=\"" << name << "\">";
123         while (first != last)
124             detail::print_char(*first++, out.get());
125         out << "</phrase>";
126     }
127
128     void syntax_highlight_actions::span_start(parse_iterator first,
129             parse_iterator last, char const* name)
130     {
131         out << "<phrase role=\"" << name << "\">";
132         while (first != last)
133             detail::print_char(*first++, out.get());
134     }
135
136     void syntax_highlight_actions::span_end(parse_iterator first,
137             parse_iterator last)
138     {
139         while (first != last)
140             detail::print_char(*first++, out.get());
141         out << "</phrase>";
142     }
143
144     void syntax_highlight_actions::unexpected_char(parse_iterator first,
145             parse_iterator last)
146     {
147         file_position const pos = state.current_file->position_of(first.base());
148
149         detail::outwarn(state.current_file->path, pos.line)
150             << "in column:" << pos.column
151             << ", unexpected character: " << std::string(first.base(), last.base())
152             << "\n";
153
154         // print out an unexpected character
155         out << "<phrase role=\"error\">";
156         while (first != last)
157             detail::print_char(*first++, out.get());
158         out << "</phrase>";
159     }
160
161     void syntax_highlight_actions::plain_char(parse_iterator first,
162             parse_iterator last)
163     {
164         while (first != last)
165             detail::print_char(*first++, out.get());
166     }
167
168     void syntax_highlight_actions::pre_escape_back(parse_iterator,
169             parse_iterator)
170     {
171         state.phrase.push(); // save the stream
172     }
173
174     void syntax_highlight_actions::post_escape_back(parse_iterator,
175             parse_iterator)
176     {
177         out << state.phrase.str();
178         state.phrase.pop(); // restore the stream
179     }
180
181     void syntax_highlight_actions::do_macro(std::string const& v)
182     {
183         do_macro_impl(v);
184     }
185
186     void syntax_highlight_actions::mark_text(parse_iterator first,
187             parse_iterator last)
188     {
189         marked_text = string_ref(first.base(), last.base());
190     }
191
192     void syntax_highlight_actions::callout(parse_iterator, parse_iterator)
193     {
194         out << state.add_callout(qbk_value(state.current_file,
195             marked_text.begin(), marked_text.end()));
196         marked_text.clear();
197     }
198
199     // Syntax
200
201     struct keywords_holder
202     {
203         cl::symbols<> cpp, python;
204
205         keywords_holder()
206         {
207             cpp
208                     =   "and_eq", "and", "asm", "auto", "bitand", "bitor",
209                         "bool", "break", "case", "catch", "char", "class",
210                         "compl", "const_cast", "const", "continue", "default",
211                         "delete", "do", "double", "dynamic_cast",  "else",
212                         "enum", "explicit", "export", "extern", "false",
213                         "float", "for", "friend", "goto", "if", "inline",
214                         "int", "long", "mutable", "namespace", "new", "not_eq",
215                         "not", "operator", "or_eq", "or", "private",
216                         "protected", "public", "register", "reinterpret_cast",
217                         "return", "short", "signed", "sizeof", "static",
218                         "static_cast", "struct", "switch", "template", "this",
219                         "throw", "true", "try", "typedef", "typeid",
220                         "typename", "union", "unsigned", "using", "virtual",
221                         "void", "volatile", "wchar_t", "while", "xor_eq", "xor"
222                     ;
223
224             python
225                     =
226                     "and",       "del",       "for",       "is",        "raise",
227                     "assert",    "elif",      "from",      "lambda",    "return",
228                     "break",     "else",      "global",    "not",       "try",
229                     "class",     "except",    "if",        "or",        "while",
230                     "continue",  "exec",      "import",    "pass",      "yield",
231                     "def",       "finally",   "in",        "print",
232
233                     // Technically "as" and "None" are not yet keywords (at Python
234                     // 2.4). They are destined to become keywords, and we treat them
235                     // as such for syntax highlighting purposes.
236
237                     "as", "None"
238                     ;
239         }
240     };
241
242     namespace {
243         keywords_holder keywords;
244     }
245
246     // Grammar for C++ highlighting
247     struct cpp_highlight : public cl::grammar<cpp_highlight>
248     {
249         cpp_highlight(syntax_highlight_actions& actions)
250             : actions(actions) {}
251
252         template <typename Scanner>
253         struct definition
254         {
255             definition(cpp_highlight const& self)
256                 : g(self.actions.state.grammar())
257             {
258                 member_action1<syntax_highlight_actions, char const*>
259                     span(self.actions, &syntax_highlight_actions::span),
260                     span_start(self.actions, &syntax_highlight_actions::span_start);
261                 member_action<syntax_highlight_actions>
262                     span_end(self.actions, &syntax_highlight_actions::span_end),
263                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
264                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
265                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
266                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
267                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
268                     callout(self.actions, &syntax_highlight_actions::callout);
269                 member_action_value<syntax_highlight_actions, std::string const&>
270                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
271                 error_action error(self.actions.state);
272
273                 program
274                     =
275                     *(  (+cl::space_p)                  [plain_char]
276                     |   macro
277                     |   escape
278                     |   preprocessor                    [span("preprocessor")]
279                     |   cl::eps_p(ph::var(self.actions.support_callouts))
280                     >>  (   line_callout                [callout]
281                         |   inline_callout              [callout]
282                         )
283                     |   comment
284                     |   keyword                         [span("keyword")]
285                     |   identifier                      [span("identifier")]
286                     |   special                         [span("special")]
287                     |   string_                         [span("string")]
288                     |   char_                           [span("char")]
289                     |   number                          [span("number")]
290                     |   u8_codepoint_p                  [unexpected_char]
291                     )
292                     ;
293
294                 macro =
295                     // must not be followed by alpha or underscore
296                     cl::eps_p(self.actions.state.macro
297                         >> (cl::eps_p - (cl::alpha_p | '_')))
298                     >> self.actions.state.macro
299                                                         [do_macro]
300                     ;
301
302                 escape =
303                     cl::str_p("``")                     [pre_escape_back]
304                     >>
305                     (
306                         (
307                             (
308                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
309                                 & g.phrase
310                             )
311                             >>  cl::str_p("``")
312                         )
313                         |
314                         (
315                             cl::eps_p                   [error]
316                             >> *cl::anychar_p
317                         )
318                     )                                   [post_escape_back]
319                     ;
320
321                 preprocessor
322                     =   '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_'))
323                     ;
324
325                 inline_callout
326                     =   cl::confix_p(
327                             "/*<" >> *cl::space_p,
328                             (*cl::anychar_p)            [mark_text],
329                             ">*/"
330                         )
331                         ;
332
333                 line_callout
334                     =   cl::confix_p(
335                             "/*<<" >> *cl::space_p,
336                             (*cl::anychar_p)            [mark_text],
337                             ">>*/"
338                         )
339                     >>  *cl::space_p
340                     ;
341
342                 comment
343                     =   cl::str_p("//")                 [span_start("comment")]
344                     >>  *(  escape
345                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
346                                                         [plain_char]
347                         )
348                     >>  cl::eps_p                       [span_end]
349                     |   cl::str_p("/*")                 [span_start("comment")]
350                     >>  *(  escape
351                         |   (+(cl::anychar_p - (cl::str_p("*/") | "``")))
352                                                         [plain_char]
353                         )
354                     >>  (!cl::str_p("*/"))              [span_end]
355                     ;
356
357                 keyword
358                     =   keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_'))
359                     ;   // make sure we recognize whole words only
360
361                 special
362                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\-")
363                     ;
364
365                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
366
367                 string_
368                     =   !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"')
369                     ;
370
371                 char_
372                     =   !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'')
373                     ;
374
375                 number
376                     =   (
377                             cl::as_lower_d["0x"] >> cl::hex_p
378                         |   '0' >> cl::oct_p
379                         |   cl::real_p
380                         )
381                         >>  *cl::as_lower_d[cl::chset_p("ldfu")]
382                     ;
383
384                 identifier
385                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
386                     ;
387             }
388
389             cl::rule<Scanner>
390                             program, macro, preprocessor,
391                             inline_callout, line_callout, comment,
392                             special, string_, 
393                             char_, number, identifier, keyword, escape,
394                             string_char;
395
396             quickbook_grammar& g;
397
398             cl::rule<Scanner> const&
399             start() const { return program; }
400         };
401
402         syntax_highlight_actions& actions;
403     };
404
405     // Grammar for Python highlighting
406     // See also: The Python Reference Manual
407     // http://docs.python.org/ref/ref.html
408     struct python_highlight : public cl::grammar<python_highlight>
409     {
410         python_highlight(syntax_highlight_actions& actions)
411             : actions(actions) {}
412
413         template <typename Scanner>
414         struct definition
415         {
416             definition(python_highlight const& self)
417                 : g(self.actions.state.grammar())
418             {
419                 member_action1<syntax_highlight_actions, char const*>
420                     span(self.actions, &syntax_highlight_actions::span),
421                     span_start(self.actions, &syntax_highlight_actions::span_start);
422                 member_action<syntax_highlight_actions>
423                     span_end(self.actions, &syntax_highlight_actions::span_end),
424                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
425                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
426                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
427                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
428                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
429                     callout(self.actions, &syntax_highlight_actions::callout);
430                 member_action_value<syntax_highlight_actions, std::string const&>
431                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
432                 error_action error(self.actions.state);
433
434                 program
435                     =
436                     *(  (+cl::space_p)                  [plain_char]
437                     |   macro
438                     |   escape          
439                     |   comment
440                     |   keyword                         [span("keyword")]
441                     |   identifier                      [span("identifier")]
442                     |   special                         [span("special")]
443                     |   string_                         [span("string")]
444                     |   number                          [span("number")]
445                     |   u8_codepoint_p                  [unexpected_char]
446                     )
447                     ;
448
449                 macro = 
450                     // must not be followed by alpha or underscore
451                     cl::eps_p(self.actions.state.macro
452                         >> (cl::eps_p - (cl::alpha_p | '_')))
453                     >> self.actions.state.macro
454                                                         [do_macro]
455                     ;
456
457                 escape =
458                     cl::str_p("``")                     [pre_escape_back]
459                     >>
460                     (
461                         (
462                             (
463                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
464                                 & g.phrase
465                             )
466                             >>  cl::str_p("``")
467                         )
468                         |
469                         (
470                             cl::eps_p                   [error]
471                             >> *cl::anychar_p
472                         )
473                     )                                   [post_escape_back]
474                     ;
475
476                 comment
477                     =   cl::str_p("#")                  [span_start("comment")]
478                     >>  *(  escape
479                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
480                                                         [plain_char]
481                         )
482                     >>  cl::eps_p                       [span_end]
483                     ;
484
485                 keyword
486                     =   keywords.python >> (cl::eps_p - (cl::alnum_p | '_'))
487                     ;   // make sure we recognize whole words only
488
489                 special
490                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-")
491                     ;
492
493                 string_prefix
494                     =    cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")]
495                     ;
496                 
497                 string_
498                     =   ! string_prefix >> (long_string | short_string)
499                     ;
500
501                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
502             
503                 short_string
504                     =   cl::confix_p('\'', * string_char, '\'') |
505                         cl::confix_p('"', * string_char, '"')
506                     ;
507             
508                 long_string
509                     // Note: the "cl::str_p" on the next two lines work around
510                     // an INTERNAL COMPILER ERROR when using VC7.1
511                     =   cl::confix_p(cl::str_p("'''"), * string_char, "'''") |
512                         cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"")
513                     ;
514                 
515                 number
516                     =   (
517                             cl::as_lower_d["0x"] >> cl::hex_p
518                         |   '0' >> cl::oct_p
519                         |   cl::real_p
520                         )
521                         >>  *cl::as_lower_d[cl::chset_p("lj")]
522                     ;
523
524                 identifier
525                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
526                     ;
527             }
528
529             cl::rule<Scanner>
530                             program, macro, comment, special, string_, string_prefix, 
531                             short_string, long_string, number, identifier, keyword, 
532                             escape, string_char;
533
534             quickbook_grammar& g;
535
536             cl::rule<Scanner> const&
537             start() const { return program; }
538         };
539
540         syntax_highlight_actions& actions;
541     };
542
543     // Grammar for plain text (no actual highlighting)
544     struct teletype_highlight : public cl::grammar<teletype_highlight>
545     {
546         teletype_highlight(syntax_highlight_actions& actions)
547             : actions(actions) {}
548
549         template <typename Scanner>
550         struct definition
551         {
552             definition(teletype_highlight const& self)
553                 : g(self.actions.state.grammar())
554             {
555                 member_action<syntax_highlight_actions>
556                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
557                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
558                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back);
559                 member_action_value<syntax_highlight_actions, std::string const&>
560                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
561                 error_action error(self.actions.state);
562
563                 program
564                     =
565                     *(  macro
566                     |   escape          
567                     |   u8_codepoint_p                  [plain_char]
568                     )
569                     ;
570
571                 macro =
572                     // must not be followed by alpha or underscore
573                     cl::eps_p(self.actions.state.macro
574                         >> (cl::eps_p - (cl::alpha_p | '_')))
575                     >> self.actions.state.macro
576                                                         [do_macro]
577                     ;
578
579                 escape =
580                     cl::str_p("``")                     [pre_escape_back]
581                     >>
582                     (
583                         (
584                             (
585                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
586                                 & g.phrase
587                             )
588                             >>  cl::str_p("``")
589                         )
590                         |
591                         (
592                             cl::eps_p                   [error]
593                             >> *cl::anychar_p
594                         )
595                     )                                   [post_escape_back]
596                     ;
597             }
598
599             cl::rule<Scanner> program, macro, escape;
600
601             quickbook_grammar& g;
602
603             cl::rule<Scanner> const&
604             start() const { return program; }
605         };
606
607         syntax_highlight_actions& actions;
608     };
609
610     std::string syntax_highlight(
611         parse_iterator first,
612         parse_iterator last,
613         quickbook::state& state,
614         std::string const& source_mode,
615         bool is_block)
616     {
617         syntax_highlight_actions syn_actions(state, is_block);
618
619         // print the code with syntax coloring
620         if (source_mode == "c++")
621         {
622             cpp_highlight cpp_p(syn_actions);
623             boost::spirit::classic::parse(first, last, cpp_p);
624         }
625         else if (source_mode == "python")
626         {
627             python_highlight python_p(syn_actions);
628             boost::spirit::classic::parse(first, last, python_p);
629         }
630         else if (source_mode == "teletype")
631         {
632             teletype_highlight teletype_p(syn_actions);
633             boost::spirit::classic::parse(first, last, teletype_p);
634         }
635         else
636         {
637             BOOST_ASSERT(0);
638         }
639
640         std::string str;
641         syn_actions.out.swap(str);
642         
643         return str;
644     }
645 }