Initialize the gmime for upstream
[platform/upstream/gmime.git] / gmime / gmime-filter-enriched.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*  GMime
3  *  Copyright (C) 2000-2012 Jeffrey Stedfast
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free
17  *  Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
18  *  02110-1301, USA.
19  */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "gmime-common.h"
31 #include "gmime-filter-enriched.h"
32
33 /* text/enriched is rfc1896 */
34
35
36 /**
37  * SECTION: gmime-filter-enriched
38  * @title: GMimeFilterEnriched
39  * @short_description: Convert text/enriched or text/rtf to HTML
40  * @see_also: #GMimeFilter
41  *
42  * A #GMimeFilter used for converting text/enriched or text/rtf to HTML.
43  **/
44
45
46 typedef char * (*EnrichedParamParser) (const char *inptr, size_t inlen);
47
48 static char *param_parse_colour (const char *inptr, size_t inlen);
49 static char *param_parse_font (const char *inptr, size_t inlen);
50 static char *param_parse_lang (const char *inptr, size_t inlen);
51
52 static struct {
53         char *enriched;
54         char *html;
55         gboolean needs_param;
56         EnrichedParamParser parse_param; /* parses *and* validates the input */
57 } enriched_tags[] = {
58         { "bold",        "<b>",                 FALSE, NULL               },
59         { "/bold",       "</b>",                FALSE, NULL               },
60         { "italic",      "<i>",                 FALSE, NULL               },
61         { "/italic",     "</i>",                FALSE, NULL               },
62         { "fixed",       "<tt>",                FALSE, NULL               },
63         { "/fixed",      "</tt>",               FALSE, NULL               },
64         { "smaller",     "<font size=-1>",      FALSE, NULL               },
65         { "/smaller",    "</font>",             FALSE, NULL               },
66         { "bigger",      "<font size=+1>",      FALSE, NULL               },
67         { "/bigger",     "</font>",             FALSE, NULL               },
68         { "underline",   "<u>",                 FALSE, NULL               },
69         { "/underline",  "</u>",                FALSE, NULL               },
70         { "center",      "<p align=center>",    FALSE, NULL               },
71         { "/center",     "</p>",                FALSE, NULL               },
72         { "flushleft",   "<p align=left>",      FALSE, NULL               },
73         { "/flushleft",  "</p>",                FALSE, NULL               },
74         { "flushright",  "<p align=right>",     FALSE, NULL               },
75         { "/flushright", "</p>",                FALSE, NULL               },
76         { "excerpt",     "<blockquote>",        FALSE, NULL               },
77         { "/excerpt",    "</blockquote>",       FALSE, NULL               },
78         { "paragraph",   "<p>",                 FALSE, NULL               },
79         { "signature",   "<address>",           FALSE, NULL               },
80         { "/signature",  "</address>",          FALSE, NULL               },
81         { "comment",     "<!-- ",               FALSE, NULL               },
82         { "/comment",    " -->",                FALSE, NULL               },
83         { "np",          "<hr>",                FALSE, NULL               },
84         { "fontfamily",  "<font face=\"%s\">",  TRUE,  param_parse_font   },
85         { "/fontfamily", "</font>",             FALSE, NULL               },
86         { "color",       "<font color=\"%s\">", TRUE,  param_parse_colour },
87         { "/color",      "</font>",             FALSE, NULL               },
88         { "lang",        "<span lang=\"%s\">",  TRUE,  param_parse_lang   },
89         { "/lang",       "</span>",             FALSE, NULL               },
90         
91         /* don't handle this tag yet... */
92         { "paraindent",  "<!-- ",               /* TRUE */ FALSE, NULL    },
93         { "/paraindent", " -->",                FALSE, NULL               },
94         
95         /* as soon as we support all the tags that can have a param
96          * tag argument, these should be unnecessary, but we'll keep
97          * them anyway just in case? */
98         { "param",       "<!-- ",               FALSE, NULL               },
99         { "/param",      " -->",                FALSE, NULL               },
100 };
101
102 #define NUM_ENRICHED_TAGS (sizeof (enriched_tags) / sizeof (enriched_tags[0]))
103
104 static GHashTable *enriched_hash = NULL;
105
106
107 static void g_mime_filter_enriched_class_init (GMimeFilterEnrichedClass *klass);
108 static void g_mime_filter_enriched_init       (GMimeFilterEnriched *filter, GMimeFilterEnrichedClass *klass);
109 static void g_mime_filter_enriched_finalize   (GObject *object);
110
111 static GMimeFilter *filter_copy (GMimeFilter *filter);
112 static void filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
113                            char **out, size_t *outlen, size_t *outprespace);
114 static void filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
115                              char **out, size_t *outlen, size_t *outprespace);
116 static void filter_reset (GMimeFilter *filter);
117
118
119 static GMimeFilterClass *parent_class = NULL;
120
121
122 GType
123 g_mime_filter_enriched_get_type (void)
124 {
125         static GType type = 0;
126         
127         if (!type) {
128                 static const GTypeInfo info = {
129                         sizeof (GMimeFilterEnrichedClass),
130                         NULL, /* base_class_init */
131                         NULL, /* base_class_finalize */
132                         (GClassInitFunc) g_mime_filter_enriched_class_init,
133                         NULL, /* class_finalize */
134                         NULL, /* class_data */
135                         sizeof (GMimeFilterEnriched),
136                         0,    /* n_preallocs */
137                         (GInstanceInitFunc) g_mime_filter_enriched_init,
138                 };
139                 
140                 type = g_type_register_static (GMIME_TYPE_FILTER, "GMimeFilterEnriched", &info, 0);
141         }
142         
143         return type;
144 }
145
146 static void
147 g_mime_filter_enriched_class_init (GMimeFilterEnrichedClass *klass)
148 {
149         GObjectClass *object_class = G_OBJECT_CLASS (klass);
150         GMimeFilterClass *filter_class = GMIME_FILTER_CLASS (klass);
151         guint i;
152         
153         parent_class = g_type_class_ref (GMIME_TYPE_FILTER);
154         
155         object_class->finalize = g_mime_filter_enriched_finalize;
156         
157         filter_class->copy = filter_copy;
158         filter_class->reset = filter_reset;
159         filter_class->filter = filter_filter;
160         filter_class->complete = filter_complete;
161         
162         if (!enriched_hash) {
163                 enriched_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
164                 for (i = 0; i < NUM_ENRICHED_TAGS; i++)
165                         g_hash_table_insert (enriched_hash, enriched_tags[i].enriched,
166                                              enriched_tags[i].html);
167         }
168 }
169
170 static void
171 g_mime_filter_enriched_init (GMimeFilterEnriched *filter, GMimeFilterEnrichedClass *klass)
172 {
173         filter->flags = 0;
174         filter->nofill = 0;
175 }
176
177 static void
178 g_mime_filter_enriched_finalize (GObject *object)
179 {
180         G_OBJECT_CLASS (parent_class)->finalize (object);
181 }
182
183
184 static GMimeFilter *
185 filter_copy (GMimeFilter *filter)
186 {
187         GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
188         
189         return g_mime_filter_enriched_new (enriched->flags);
190 }
191
192 #if 0
193 static gboolean
194 enriched_tag_needs_param (const char *tag)
195 {
196         int i;
197         
198         for (i = 0; i < NUM_ENRICHED_TAGS; i++)
199                 if (!g_ascii_strcasecmp (tag, enriched_tags[i].enriched))
200                         return enriched_tags[i].needs_param;
201         
202         return FALSE;
203 }
204 #endif
205
206 static gboolean
207 html_tag_needs_param (const char *tag)
208 {
209         return strstr (tag, "%s") != NULL;
210 }
211
212 static const char *valid_colours[] = {
213         "red", "green", "blue", "yellow", "cyan", "magenta", "black", "white"
214 };
215
216 #define NUM_VALID_COLOURS  (sizeof (valid_colours) / sizeof (valid_colours[0]))
217
218 static char *
219 param_parse_colour (const char *inptr, size_t inlen)
220 {
221         const char *inend, *end;
222         guint32 rgb = 0;
223         guint v, i;
224         
225         for (i = 0; i < NUM_VALID_COLOURS; i++) {
226                 if (!g_ascii_strncasecmp (inptr, valid_colours[i], inlen))
227                         return g_strdup (valid_colours[i]);
228         }
229         
230         /* check for numeric r/g/b in the format: ####,####,#### */
231         if (inptr[4] != ',' || inptr[9] != ',') {
232                 /* okay, mailer must have used a string name that
233                  * rfc1896 did not specify? do some simple scanning
234                  * action, a colour name MUST be [a-zA-Z] */
235                 end = inptr;
236                 inend = inptr + inlen;
237                 while (end < inend && ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z')))
238                         end++;
239                 
240                 return g_strndup (inptr, (size_t) (end - inptr));
241         }
242         
243         for (i = 0; i < 3; i++) {
244                 v = strtoul (inptr, (char **) &end, 16);
245                 if (end != inptr + 4)
246                         goto invalid_format;
247                 
248                 v >>= 8;
249                 rgb = (rgb << 8) | (v & 0xff);
250                 
251                 inptr += 5;
252         }
253         
254         return g_strdup_printf ("#%.6X", rgb);
255         
256  invalid_format:
257         
258         /* default colour? */
259         return g_strdup ("black");
260 }
261
262 static char *
263 param_parse_font (const char *fontfamily, size_t inlen)
264 {
265         register const char *inptr = fontfamily;
266         const char *inend = inptr + inlen;
267         
268         /* don't allow any of '"', '<', nor '>' */
269         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
270                 inptr++;
271         
272         return g_strndup (fontfamily, (size_t) (inptr - fontfamily));
273 }
274
275 static char *
276 param_parse_lang (const char *lang, size_t inlen)
277 {
278         register const char *inptr = lang;
279         const char *inend = inptr + inlen;
280         
281         /* don't allow any of '"', '<', nor '>' */
282         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
283                 inptr++;
284         
285         return g_strndup (lang, (size_t) (inptr - lang));
286 }
287
288 static char *
289 param_parse (const char *enriched, const char *inptr, size_t inlen)
290 {
291         guint i;
292         
293         for (i = 0; i < NUM_ENRICHED_TAGS; i++) {
294                 if (!g_ascii_strcasecmp (enriched, enriched_tags[i].enriched))
295                         return enriched_tags[i].parse_param (inptr, inlen);
296         }
297         
298         g_assert_not_reached ();
299         
300         return NULL;
301 }
302
303 #define IS_RICHTEXT GMIME_FILTER_ENRICHED_IS_RICHTEXT
304
305 static void
306 enriched_to_html (GMimeFilter *filter, char *in, size_t inlen, size_t prespace,
307                   char **out, size_t *outlen, size_t *outprespace, gboolean flush)
308 {
309         GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
310         const char *tag, *inend, *outend;
311         register const char *inptr;
312         register char *outptr;
313         
314         g_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
315         
316         inptr = in;
317         inend = in + inlen;
318         outptr = filter->outbuf;
319         outend = filter->outbuf + filter->outsize;
320         
321  retry:
322         do {
323                 while (inptr < inend && outptr < outend && !strchr (" <>&\n", *inptr))
324                         *outptr++ = *inptr++;
325                 
326                 if (outptr == outend)
327                         goto backup;
328                 
329                 if ((inptr + 1) >= inend)
330                         break;
331                 
332                 switch (*inptr++) {
333                 case ' ':
334                         while (inptr < inend && (outptr + 7) < outend && *inptr == ' ') {
335                                 memcpy (outptr, "&nbsp;", 6);
336                                 outptr += 6;
337                                 inptr++;
338                         }
339                         
340                         if (outptr < outend)
341                                 *outptr++ = ' ';
342                         
343                         break;
344                 case '\n':
345                         if (!(enriched->flags & IS_RICHTEXT)) {
346                                 /* text/enriched */
347                                 if (enriched->nofill > 0) {
348                                         if ((outptr + 4) < outend) {
349                                                 memcpy (outptr, "<br>", 4);
350                                                 outptr += 4;
351                                         } else {
352                                                 inptr--;
353                                                 goto backup;
354                                         }
355                                 } else if (*inptr == '\n') {
356                                         if ((outptr + 4) >= outend) {
357                                                 inptr--;
358                                                 goto backup;
359                                         }
360                                         
361                                         while (inptr < inend && (outptr + 4) < outend && *inptr == '\n') {
362                                                 memcpy (outptr, "<br>", 4);
363                                                 outptr += 4;
364                                                 inptr++;
365                                         }
366                                 } else {
367                                         *outptr++ = ' ';
368                                 }
369                         } else {
370                                 /* text/richtext */
371                                 *outptr++ = ' ';
372                         }
373                         break;
374                 case '>':
375                         if ((outptr + 4) < outend) {
376                                 memcpy (outptr, "&gt;", 4);
377                                 outptr += 4;
378                         } else {
379                                 inptr--;
380                                 goto backup;
381                         }
382                         break;
383                 case '&':
384                         if ((outptr + 5) < outend) {
385                                 memcpy (outptr, "&amp;", 5);
386                                 outptr += 5;
387                         } else {
388                                 inptr--;
389                                 goto backup;
390                         }
391                         break;
392                 case '<':
393                         if (!(enriched->flags & IS_RICHTEXT)) {
394                                 /* text/enriched */
395                                 if (*inptr == '<') {
396                                         if ((outptr + 4) < outend) {
397                                                 memcpy (outptr, "&lt;", 4);
398                                                 outptr += 4;
399                                                 inptr++;
400                                                 break;
401                                         } else {
402                                                 inptr--;
403                                                 goto backup;
404                                         }
405                                 }
406                         } else {
407                                 /* text/richtext */
408                                 if ((inend - inptr) >= 3 && (outptr + 4) < outend) {
409                                         if (strncmp (inptr, "lt>", 3) == 0) {
410                                                 memcpy (outptr, "&lt;", 4);
411                                                 outptr += 4;
412                                                 inptr += 3;
413                                                 break;
414                                         } else if (strncmp (inptr, "nl>", 3) == 0) {
415                                                 memcpy (outptr, "<br>", 4);
416                                                 outptr += 4;
417                                                 inptr += 3;
418                                                 break;
419                                         }
420                                 } else {
421                                         inptr--;
422                                         goto backup;
423                                 }
424                         }
425                         
426                         tag = inptr;
427                         while (inptr < inend && *inptr != '>')
428                                 inptr++;
429                         
430                         if (inptr == inend) {
431                                 inptr = tag - 1;
432                                 goto need_input;
433                         }
434                         
435                         if (!g_ascii_strncasecmp (tag, "nofill>", 7)) {
436                                 if ((outptr + 5) < outend) {
437                                         enriched->nofill++;
438                                 } else {
439                                         inptr = tag - 1;
440                                         goto backup;
441                                 }
442                         } else if (!g_ascii_strncasecmp (tag, "/nofill>", 8)) {
443                                 if ((outptr + 6) < outend) {
444                                         enriched->nofill--;
445                                 } else {
446                                         inptr = tag - 1;
447                                         goto backup;
448                                 }
449                         } else {
450                                 const char *html_tag;
451                                 char *enriched_tag;
452                                 size_t len;
453                                 
454                                 len = inptr - tag;
455                                 enriched_tag = g_alloca (len + 1);
456                                 memcpy (enriched_tag, tag, len);
457                                 enriched_tag[len] = '\0';
458                                 
459                                 html_tag = g_hash_table_lookup (enriched_hash, enriched_tag);
460                                 
461                                 if (html_tag) {
462                                         if (html_tag_needs_param (html_tag)) {
463                                                 const char *start;
464                                                 char *param;
465                                                 
466                                                 while (inptr < inend && *inptr != '<')
467                                                         inptr++;
468                                                 
469 #define PARAM_TAG_MIN_LEN  (sizeof ("<param>") + sizeof ("</param>") - 1)
470                                                 if (inptr == inend || (size_t) (inend - inptr) <= PARAM_TAG_MIN_LEN) {
471                                                         inptr = tag - 1;
472                                                         goto need_input;
473                                                 }
474                                                 
475                                                 if (g_ascii_strncasecmp (inptr, "<param>", 7) != 0) {
476                                                         /* ignore the enriched command tag... */
477                                                         inptr -= 1;
478                                                         goto loop;
479                                                 }
480                                                 
481                                                 inptr += 7;
482                                                 start = inptr;
483                                                 
484                                                 while (inptr < inend && *inptr != '<')
485                                                         inptr++;
486                                                 
487                                                 if (inptr == inend || (inend - inptr) <= 8) {
488                                                         inptr = tag - 1;
489                                                         goto need_input;
490                                                 }
491                                                 
492                                                 if (g_ascii_strncasecmp (inptr, "</param>", 8) != 0) {
493                                                         /* ignore the enriched command tag... */
494                                                         inptr += 7;
495                                                         goto loop;
496                                                 }
497                                                 
498                                                 len = inptr - start;
499                                                 param = param_parse (enriched_tag, start, len);
500                                                 len = strlen (param);
501                                                 
502                                                 inptr += 7;
503                                                 
504                                                 len += strlen (html_tag);
505                                                 
506                                                 if ((outptr + len) < outend) {
507                                                         outptr += g_snprintf (outptr, len, html_tag, param);
508                                                         g_free (param);
509                                                 } else {
510                                                         g_free (param);
511                                                         inptr = tag - 1;
512                                                         goto backup;
513                                                 }
514                                         } else {
515                                                 len = strlen (html_tag);
516                                                 if ((outptr + len) < outend) {
517                                                         memcpy (outptr, html_tag, len);
518                                                         outptr += len;
519                                                 } else {
520                                                         inptr = tag - 1;
521                                                         goto backup;
522                                                 }
523                                         }
524                                 }
525                         }
526                         
527                 loop:
528                         inptr++;
529                         break;
530                 default:
531                         break;
532                 }
533         } while (inptr < inend);
534         
535  need_input:
536         
537         /* the reason we ignore @flush here is because if there isn't
538            enough input to parse a tag, then there's nothing we can
539            do. */
540         
541         if (inptr < inend)
542                 g_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
543         
544         *out = filter->outbuf;
545         *outlen = outptr - filter->outbuf;
546         *outprespace = filter->outpre;
547         
548         return;
549         
550  backup:
551         
552         if (flush) {
553                 size_t offset, grow;
554                 
555                 grow = (inend - inptr) * 2 + 20;
556                 offset = outptr - filter->outbuf;
557                 g_mime_filter_set_size (filter, filter->outsize + grow, TRUE);
558                 outend = filter->outbuf + filter->outsize;
559                 outptr = filter->outbuf + offset;
560                 
561                 goto retry;
562         } else {
563                 g_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
564         }
565         
566         *out = filter->outbuf;
567         *outlen = outptr - filter->outbuf;
568         *outprespace = filter->outpre;
569 }
570
571 static void
572 filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
573                char **out, size_t *outlen, size_t *outprespace)
574 {
575         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, FALSE);
576 }
577
578 static void 
579 filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
580                  char **out, size_t *outlen, size_t *outprespace)
581 {
582         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, TRUE);
583 }
584
585 static void
586 filter_reset (GMimeFilter *filter)
587 {
588         GMimeFilterEnriched *enriched = (GMimeFilterEnriched *) filter;
589         
590         enriched->nofill = 0;
591 }
592
593
594 /**
595  * g_mime_filter_enriched_new:
596  * @flags: flags
597  *
598  * Creates a new GMimeFilterEnriched object.
599  *
600  * Returns: a new GMimeFilter object.
601  **/
602 GMimeFilter *
603 g_mime_filter_enriched_new (guint32 flags)
604 {
605         GMimeFilterEnriched *new;
606         
607         new = g_object_newv (GMIME_TYPE_FILTER_ENRICHED, 0, NULL);
608         new->flags = flags;
609         
610         return (GMimeFilter *) new;
611 }