6e0c2076fde00e6ecdf3b0e31eaf430e38a07bbb
[platform/upstream/evolution-data-server.git] / camel / camel-mime-filter-tohtml.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  *  Authors: Jeffrey Stedfast <fejj@ximian.com>
4  *
5  *  Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU Lesser General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  *
21  */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "camel-mime-filter-tohtml.h"
31 #include "camel-url-scanner.h"
32 #include "camel-utf8.h"
33
34 /**
35  * TODO: convert common text/plain 'markup' to html. eg.:
36  *
37  * _word_ -> <u>_word_</u>
38  * *word* -> <b>*word*</b>
39  * /word/ -> <i>/word/</i>
40  **/
41
42 #define d(x)
43
44 #define FOOLISHLY_UNMUNGE_FROM 0
45
46 #define CONVERT_WEB_URLS  CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
47 #define CONVERT_ADDRSPEC  CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
48
49 static struct {
50         guint mask;
51         urlpattern_t pattern;
52 } patterns[] = {
53         { CONVERT_WEB_URLS, { "file://",   "",        camel_url_file_start,     camel_url_file_end     } },
54         { CONVERT_WEB_URLS, { "ftp://",    "",        camel_url_web_start,      camel_url_web_end      } },
55         { CONVERT_WEB_URLS, { "sftp://",   "",        camel_url_web_start,      camel_url_web_end      } },
56         { CONVERT_WEB_URLS, { "http://",   "",        camel_url_web_start,      camel_url_web_end      } },
57         { CONVERT_WEB_URLS, { "https://",  "",        camel_url_web_start,      camel_url_web_end      } },
58         { CONVERT_WEB_URLS, { "news://",   "",        camel_url_web_start,      camel_url_web_end      } },
59         { CONVERT_WEB_URLS, { "nntp://",   "",        camel_url_web_start,      camel_url_web_end      } },
60         { CONVERT_WEB_URLS, { "telnet://", "",        camel_url_web_start,      camel_url_web_end      } },
61         { CONVERT_WEB_URLS, { "webcal://", "",        camel_url_web_start,      camel_url_web_end      } },
62         { CONVERT_WEB_URLS, { "mailto:",   "",        camel_url_web_start,      camel_url_web_end      } },
63         { CONVERT_WEB_URLS, { "callto:",   "",        camel_url_web_start,      camel_url_web_end      } },
64         { CONVERT_WEB_URLS, { "h323:",     "",        camel_url_web_start,      camel_url_web_end      } },
65         { CONVERT_WEB_URLS, { "sip:",      "",        camel_url_web_start,      camel_url_web_end      } },
66         { CONVERT_WEB_URLS, { "www.",      "http://", camel_url_web_start,      camel_url_web_end      } },
67         { CONVERT_WEB_URLS, { "ftp.",      "ftp://",  camel_url_web_start,      camel_url_web_end      } },
68         { CONVERT_ADDRSPEC, { "@",         "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
69 };
70
71 static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
72 static void camel_mime_filter_tohtml_init       (CamelMimeFilterToHTML *filter);
73 static void camel_mime_filter_tohtml_finalize   (CamelObject *obj);
74
75 static CamelMimeFilterClass *camel_mime_filter_tohtml_parent;
76
77 CamelType
78 camel_mime_filter_tohtml_get_type (void)
79 {
80         static CamelType type = CAMEL_INVALID_TYPE;
81
82         if (type == CAMEL_INVALID_TYPE) {
83                 type = camel_type_register (camel_mime_filter_get_type (),
84                                             "CamelMimeFilterToHTML",
85                                             sizeof (CamelMimeFilterToHTML),
86                                             sizeof (CamelMimeFilterToHTMLClass),
87                                             (CamelObjectClassInitFunc) camel_mime_filter_tohtml_class_init,
88                                             NULL,
89                                             (CamelObjectInitFunc) camel_mime_filter_tohtml_init,
90                                             (CamelObjectFinalizeFunc) camel_mime_filter_tohtml_finalize);
91         }
92
93         return type;
94 }
95
96 static void
97 camel_mime_filter_tohtml_finalize (CamelObject *obj)
98 {
99         CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
100
101         camel_url_scanner_free (filter->scanner);
102 }
103
104 static void
105 camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
106 {
107         filter->scanner = camel_url_scanner_new ();
108
109         filter->flags = 0;
110         filter->color = 0;
111         filter->column = 0;
112         filter->pre_open = FALSE;
113 }
114
115 static gchar *
116 check_size (CamelMimeFilter *filter, gchar *outptr, gchar **outend, gsize len)
117 {
118         gsize offset;
119
120         if (*outend - outptr >= len)
121                 return outptr;
122
123         offset = outptr - filter->outbuf;
124
125         camel_mime_filter_set_size (filter, filter->outsize + len, TRUE);
126
127         *outend = filter->outbuf + filter->outsize;
128
129         return filter->outbuf + offset;
130 }
131
132 static gchar *
133 append_string_verbatim (CamelMimeFilter *filter, const gchar *str, gchar *outptr, gchar **outend)
134 {
135         gsize len = strlen (str);
136
137         outptr = check_size (filter, outptr, outend, len);
138         memcpy(outptr, str, len);
139         outptr += len;
140
141         return outptr;
142 }
143
144 static gint
145 citation_depth (const gchar *in)
146 {
147         register const gchar *inptr = in;
148         gint depth = 1;
149
150         if (*inptr++ != '>')
151                 return 0;
152
153 #if FOOLISHLY_UNMUNGE_FROM
154         /* check that it isn't an escaped From line */
155         if (!strncmp (inptr, "From", 4))
156                 return 0;
157 #endif
158
159         while (*inptr != '\n') {
160                 if (*inptr == ' ')
161                         inptr++;
162
163                 if (*inptr++ != '>')
164                         break;
165
166                 depth++;
167         }
168
169         return depth;
170 }
171
172 static gchar *
173 writeln (CamelMimeFilter *filter, const guchar *in, const guchar *inend, gchar *outptr, gchar **outend)
174 {
175         CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
176         const guchar *inptr = in;
177
178         while (inptr < inend) {
179                 guint32 u;
180
181                 outptr = check_size (filter, outptr, outend, 16);
182
183                 u = camel_utf8_getc_limit (&inptr, inend);
184                 switch (u) {
185                 case 0xffff:
186                         g_warning("Truncated utf8 buffer");
187                         return outptr;
188                 case '<':
189                         outptr = g_stpcpy (outptr, "&lt;");
190                         html->column++;
191                         break;
192                 case '>':
193                         outptr = g_stpcpy (outptr, "&gt;");
194                         html->column++;
195                         break;
196                 case '&':
197                         outptr = g_stpcpy (outptr, "&amp;");
198                         html->column++;
199                         break;
200                 case '"':
201                         outptr = g_stpcpy (outptr, "&quot;");
202                         html->column++;
203                         break;
204                 case '\t':
205                         if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) {
206                                 do {
207                                         outptr = check_size (filter, outptr, outend, 7);
208                                         outptr = g_stpcpy (outptr, "&nbsp;");
209                                         html->column++;
210                                 } while (html->column % 8);
211                                 break;
212                         }
213                         /* otherwise, FALL THROUGH */
214                 case ' ':
215                         if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
216                             && ((inptr == (in + 1) || (inptr < inend && (*inptr == ' ' || *inptr == '\t'))))) {
217                                 outptr = g_stpcpy (outptr, "&nbsp;");
218                                 html->column++;
219                                 break;
220                         }
221                         /* otherwise, FALL THROUGH */
222                 default:
223                         if (u >= 20 && u <0x80)
224                                 *outptr++ = u;
225                         else {
226                                 if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
227                                         *outptr++ = '?';
228                                 else
229                                         outptr += sprintf(outptr, "&#%u;", u);
230                         }
231                         html->column++;
232                         break;
233                 }
234         }
235
236         return outptr;
237 }
238
239 static void
240 html_convert (CamelMimeFilter *filter, const gchar *in, gsize inlen, gsize prespace,
241               gchar **out, gsize *outlen, gsize *outprespace, gboolean flush)
242 {
243         CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
244         const gchar *inptr;
245         gchar *outptr, *outend;
246         const gchar *start;
247         const gchar *inend;
248         gint depth;
249
250         if (inlen == 0) {
251                 if (html->pre_open) {
252                         /* close the pre-tag */
253                         outend = filter->outbuf + filter->outsize;
254                         outptr = check_size (filter, filter->outbuf, &outend, 10);
255                         outptr = g_stpcpy (outptr, "</pre>");
256                         html->pre_open = FALSE;
257
258                         *out = filter->outbuf;
259                         *outlen = outptr - filter->outbuf;
260                         *outprespace = filter->outpre;
261                 } else {
262                         *out = (gchar *) in;
263                         *outlen = 0;
264                         *outprespace = 0;
265                 }
266
267                 return;
268         }
269
270         camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
271
272         inptr = in;
273         inend = in + inlen;
274         outptr = filter->outbuf;
275         outend = filter->outbuf + filter->outsize;
276
277         if (html->flags & CAMEL_MIME_FILTER_TOHTML_PRE && !html->pre_open) {
278                 outptr = g_stpcpy (outptr, "<pre>");
279                 html->pre_open = TRUE;
280         }
281
282         start = inptr;
283         do {
284                 while (inptr < inend && *inptr != '\n')
285                         inptr++;
286
287                 if (inptr >= inend && !flush)
288                         break;
289
290                 html->column = 0;
291                 depth = 0;
292
293                 if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) {
294                         if ((depth = citation_depth (start)) > 0) {
295                                 /* FIXME: we could easily support multiple color depths here */
296
297                                 outptr = check_size (filter, outptr, &outend, 25);
298                                 outptr += sprintf(outptr, "<font color=\"#%06x\">", (html->color & 0xffffff));
299                         }
300 #if FOOLISHLY_UNMUNGE_FROM
301                         else if (*start == '>') {
302                                 /* >From line */
303                                 start++;
304                         }
305 #endif
306                 } else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE) {
307                         outptr = check_size (filter, outptr, &outend, 6);
308                         outptr = g_stpcpy (outptr, "&gt; ");
309                         html->column += 2;
310                 }
311
312 #define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
313                 if (html->flags & CONVERT_URLS) {
314                         gsize matchlen, len;
315                         urlmatch_t match;
316
317                         len = inptr - start;
318
319                         do {
320                                 if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
321                                         /* write out anything before the first regex match */
322                                         outptr = writeln (filter, (const guchar *)start, (const guchar *)start + match.um_so,
323                                                           outptr, &outend);
324
325                                         start += match.um_so;
326                                         len -= match.um_so;
327
328                                         matchlen = match.um_eo - match.um_so;
329
330                                         /* write out the href tag */
331                                         outptr = append_string_verbatim (filter, "<a href=\"", outptr, &outend);
332                                         /* prefix shouldn't need escaping, but let's be safe */
333                                         outptr = writeln (filter,
334                                                         (const guchar *)match.prefix,
335                                                         (const guchar *)match.prefix + strlen (match.prefix),
336                                                         outptr, &outend);
337                                         outptr = writeln (filter,
338                                                         (const guchar *)start,
339                                                         (const guchar *)start + matchlen,
340                                                         outptr, &outend);
341                                         outptr = append_string_verbatim (filter, "\">", outptr, &outend);
342
343                                         /* now write the matched string */
344                                         outptr = writeln (filter,
345                                                         (const guchar *)start,
346                                                         (const guchar *)start + matchlen,
347                                                         outptr, &outend);
348                                         html->column += matchlen;
349                                         start += matchlen;
350                                         len -= matchlen;
351
352                                         /* close the href tag */
353                                         outptr = append_string_verbatim (filter, "</a>", outptr, &outend);
354                                 } else {
355                                         /* nothing matched so write out the remainder of this line buffer */
356                                         outptr = writeln (filter, (const guchar *)start, (const guchar *)start + len, outptr, &outend);
357                                         break;
358                                 }
359                         } while (len > 0);
360                 } else {
361                         outptr = writeln (filter, (const guchar *)start, (const guchar *)inptr, outptr, &outend);
362                 }
363
364                 if ((html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) && depth > 0) {
365                         outptr = check_size (filter, outptr, &outend, 8);
366                         outptr = g_stpcpy (outptr, "</font>");
367                 }
368
369                 if (inptr < inend) {
370                         if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL) {
371                                 outptr = check_size (filter, outptr, &outend, 5);
372                                 outptr = g_stpcpy (outptr, "<br>");
373                         }
374
375                         *outptr++ = '\n';
376                 }
377
378                 start = ++inptr;
379         } while (inptr < inend);
380
381         if (flush) {
382                 /* flush the rest of our input buffer */
383                 if (start < inend)
384                         outptr = writeln (filter, (const guchar *)start, (const guchar *)inend, outptr, &outend);
385
386                 if (html->pre_open) {
387                         /* close the pre-tag */
388                         outptr = check_size (filter, outptr, &outend, 10);
389                         outptr = g_stpcpy (outptr, "</pre>");
390                 }
391         } else if (start < inend) {
392                 /* backup */
393                 camel_mime_filter_backup (filter, start, (unsigned) (inend - start));
394         }
395
396         *out = filter->outbuf;
397         *outlen = outptr - filter->outbuf;
398         *outprespace = filter->outpre;
399 }
400
401 static void
402 filter_filter (CamelMimeFilter *filter, const gchar *in, gsize len, gsize prespace,
403                gchar **out, gsize *outlen, gsize *outprespace)
404 {
405         html_convert (filter, in, len, prespace, out, outlen, outprespace, FALSE);
406 }
407
408 static void
409 filter_complete (CamelMimeFilter *filter, const gchar *in, gsize len, gsize prespace,
410                  gchar **out, gsize *outlen, gsize *outprespace)
411 {
412         html_convert (filter, in, len, prespace, out, outlen, outprespace, TRUE);
413 }
414
415 static void
416 filter_reset (CamelMimeFilter *filter)
417 {
418         CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
419
420         html->column = 0;
421         html->pre_open = FALSE;
422 }
423
424 static void
425 camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass)
426 {
427         CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass;
428
429         camel_mime_filter_tohtml_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ()));
430
431         filter_class->reset = filter_reset;
432         filter_class->filter = filter_filter;
433         filter_class->complete = filter_complete;
434 }
435
436 /**
437  * camel_mime_filter_tohtml_new:
438  * @flags: bitwise flags defining the behaviour
439  * @color: color to use when highlighting quoted text
440  *
441  * Create a new #CamelMimeFilterToHTML object to convert plain text
442  * into HTML.
443  *
444  * Returns: a new #CamelMimeFilterToHTML object
445  **/
446 CamelMimeFilter *
447 camel_mime_filter_tohtml_new (guint32 flags, guint32 color)
448 {
449         CamelMimeFilterToHTML *new;
450         gint i;
451
452         new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
453
454         new->flags = flags;
455         new->color = color;
456
457         for (i = 0; i < G_N_ELEMENTS (patterns); i++) {
458                 if (patterns[i].mask & flags)
459                         camel_url_scanner_add (new->scanner, &patterns[i].pattern);
460         }
461
462         return CAMEL_MIME_FILTER (new);
463 }
464
465 /**
466  * camel_text_to_html:
467  * @in: input text
468  * @flags: bitwise flags defining the html conversion behaviour
469  * @color: color to use when syntax highlighting
470  *
471  * Convert @in from plain text into HTML.
472  *
473  * Returns: a newly allocated string containing the HTMLified version
474  * of @in
475  **/
476 gchar *
477 camel_text_to_html (const gchar *in, guint32 flags, guint32 color)
478 {
479         CamelMimeFilter *filter;
480         gsize outlen, outpre;
481         gchar *outbuf;
482
483         g_return_val_if_fail (in != NULL, NULL);
484
485         filter = camel_mime_filter_tohtml_new (flags, color);
486
487         camel_mime_filter_complete (filter, (gchar *) in, strlen (in), 0,
488                                     &outbuf, &outlen, &outpre);
489
490         outbuf = g_strndup (outbuf, outlen);
491
492         camel_object_unref (filter);
493
494         return outbuf;
495 }