e653eea727536f88e38dbb4013a33de32e805e17
[platform/upstream/evolution-data-server.git] / camel / camel-mime-filter-enriched.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  *  Authors: Jeffrey Stedfast <fejj@ximian.com>
4  *
5  *  Copyright 2002 Ximian, Inc. (www.ximian.com)
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
20  *
21  */
22
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <stdio.h>
29 #include <string.h>
30
31 #include "string-utils.h"
32
33 #include "camel-mime-filter-enriched.h"
34
35 /* text/enriched is rfc1896 */
36
37 typedef char * (*EnrichedParamParser) (const char *inptr, int inlen);
38
39 static char *param_parse_colour (const char *inptr, int inlen);
40 static char *param_parse_font (const char *inptr, int inlen);
41 static char *param_parse_lang (const char *inptr, int inlen);
42
43 static struct {
44         char *enriched;
45         char *html;
46         gboolean needs_param;
47         EnrichedParamParser parse_param; /* parses *and* validates the input */
48 } enriched_tags[] = {
49         { "bold",        "<b>",                 FALSE, NULL               },
50         { "/bold",       "</b>",                FALSE, NULL               },
51         { "italic",      "<i>",                 FALSE, NULL               },
52         { "/italic",     "</i>",                FALSE, NULL               },
53         { "fixed",       "<tt>",                FALSE, NULL               },
54         { "/fixed",      "</tt>",               FALSE, NULL               },
55         { "smaller",     "<font size=-1>",      FALSE, NULL               },
56         { "/smaller",    "</font>",             FALSE, NULL               },
57         { "bigger",      "<font size=+1>",      FALSE, NULL               },
58         { "/bigger",     "</font>",             FALSE, NULL               },
59         { "underline",   "<u>",                 FALSE, NULL               },
60         { "/underline",  "</u>",                FALSE, NULL               },
61         { "center",      "<p align=center>",    FALSE, NULL               },
62         { "/center",     "</p>",                FALSE, NULL               },
63         { "flushleft",   "<p align=left>",      FALSE, NULL               },
64         { "/flushleft",  "</p>",                FALSE, NULL               },
65         { "flushright",  "<p align=right>",     FALSE, NULL               },
66         { "/flushright", "</p>",                FALSE, NULL               },
67         { "excerpt",     "<blockquote>",        FALSE, NULL               },
68         { "/excerpt",    "</blockquote>",       FALSE, NULL               },
69         { "paragraph",   "<p>",                 FALSE, NULL               },
70         { "signature",   "<address>",           FALSE, NULL               },
71         { "/signature",  "</address>",          FALSE, NULL               },
72         { "comment",     "<!-- ",               FALSE, NULL               },
73         { "/comment",    " -->",                FALSE, NULL               },
74         { "np",          "<hr>",                FALSE, NULL               },
75         { "fontfamily",  "<font face=\"%s\">",  TRUE,  param_parse_font   },
76         { "/fontfamily", "</font>",             FALSE, NULL               },
77         { "color",       "<font color=\"%s\">", TRUE,  param_parse_colour },
78         { "/color",      "</font>",             FALSE, NULL               },
79         { "lang",        "<span lang=\"%s\">",  TRUE,  param_parse_lang   },
80         { "/lang",       "</span>",             FALSE, NULL               },
81         
82         /* don't handle this tag yet... */
83         { "paraindent",  "<!-- ",               /* TRUE */ FALSE, NULL    },
84         { "/paraindent", " -->",                FALSE, NULL               },
85         
86         /* as soon as we support all the tags that can have a param
87          * tag argument, these should be unnecessary, but we'll keep
88          * them anyway just in case? */
89         { "param",       "<!-- ",               FALSE, NULL               },
90         { "/param",      " -->",                FALSE, NULL               },
91 };
92
93 #define NUM_ENRICHED_TAGS (sizeof (enriched_tags) / sizeof (enriched_tags[0]))
94
95 static GHashTable *enriched_hash = NULL;
96
97
98 static void camel_mime_filter_enriched_class_init (CamelMimeFilterEnrichedClass *klass);
99 static void camel_mime_filter_enriched_init       (CamelMimeFilterEnriched *filter);
100 static void camel_mime_filter_enriched_finalize   (CamelObject *obj);
101
102 static void filter_filter (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
103                            char **out, size_t *outlen, size_t *outprespace);
104 static void filter_complete (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
105                              char **out, size_t *outlen, size_t *outprespace);
106 static void filter_reset (CamelMimeFilter *filter);
107
108
109 static CamelMimeFilterClass *parent_class = NULL;
110
111
112 CamelType
113 camel_mime_filter_enriched_get_type (void)
114 {
115         static CamelType type = CAMEL_INVALID_TYPE;
116         
117         if (type == CAMEL_INVALID_TYPE) {
118                 type = camel_type_register (camel_mime_filter_get_type (),
119                                             "CamelMimeFilterEnriched",
120                                             sizeof (CamelMimeFilterEnriched),
121                                             sizeof (CamelMimeFilterEnrichedClass),
122                                             (CamelObjectClassInitFunc) camel_mime_filter_enriched_class_init,
123                                             NULL,
124                                             (CamelObjectInitFunc) camel_mime_filter_enriched_init,
125                                             (CamelObjectFinalizeFunc) camel_mime_filter_enriched_finalize);
126         }
127         
128         return type;
129 }
130
131 static void
132 camel_mime_filter_enriched_class_init (CamelMimeFilterEnrichedClass *klass)
133 {
134         CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass;
135         int i;
136         
137         parent_class = CAMEL_MIME_FILTER_CLASS (camel_mime_filter_get_type ());
138         
139         filter_class->reset = filter_reset;
140         filter_class->filter = filter_filter;
141         filter_class->complete = filter_complete;
142         
143         if (!enriched_hash) {
144                 enriched_hash = g_hash_table_new (g_strcase_hash, g_strcase_equal);
145                 for (i = 0; i < NUM_ENRICHED_TAGS; i++)
146                         g_hash_table_insert (enriched_hash, enriched_tags[i].enriched,
147                                              enriched_tags[i].html);
148         }
149 }
150
151 static void
152 camel_mime_filter_enriched_finalize (CamelObject *obj)
153 {
154         ;
155 }
156
157 static void
158 camel_mime_filter_enriched_init (CamelMimeFilterEnriched *filter)
159 {
160         filter->flags = 0;
161         filter->nofill = 0;
162 }
163
164
165 #if 0
166 static gboolean
167 enriched_tag_needs_param (const char *tag)
168 {
169         int i;
170         
171         for (i = 0; i < NUM_ENRICHED_TAGS; i++)
172                 if (!strcasecmp (tag, enriched_tags[i].enriched))
173                         return enriched_tags[i].needs_param;
174         
175         return FALSE;
176 }
177 #endif
178
179 static gboolean
180 html_tag_needs_param (const char *tag)
181 {
182         return strstr (tag, "%s") != NULL;
183 }
184
185 static const char *valid_colours[] = {
186         "red", "green", "blue", "yellow", "cyan", "magenta", "black", "white"
187 };
188
189 #define NUM_VALID_COLOURS  (sizeof (valid_colours) / sizeof (valid_colours[0]))
190
191 static char *
192 param_parse_colour (const char *inptr, int inlen)
193 {
194         const char *inend, *end;
195         guint32 rgb = 0;
196         guint v;
197         int i;
198         
199         for (i = 0; i < NUM_VALID_COLOURS; i++) {
200                 if (!strncasecmp (inptr, valid_colours[i], inlen))
201                         return g_strdup (valid_colours[i]);
202         }
203         
204         /* check for numeric r/g/b in the format: ####,####,#### */
205         if (inptr[4] != ',' || inptr[9] != ',') {
206                 /* okay, mailer must have used a string name that
207                  * rfc1896 did not specify? do some simple scanning
208                  * action, a colour name MUST be [a-zA-Z] */
209                 end = inptr;
210                 inend = inptr + inlen;
211                 while (end < inend && ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z')))
212                         end++;
213                 
214                 return g_strndup (inptr, end - inptr);
215         }
216         
217         for (i = 0; i < 3; i++) {
218                 v = strtoul (inptr, (char **) &end, 16);
219                 if (end != inptr + 4)
220                         goto invalid_format;
221                 
222                 v >>= 8;
223                 rgb = (rgb << 8) | (v & 0xff);
224                 
225                 inptr += 5;
226         }
227         
228         return g_strdup_printf ("#%.6X", rgb);
229         
230  invalid_format:
231         
232         /* default colour? */
233         return g_strdup ("black");
234 }
235
236 static char *
237 param_parse_font (const char *fontfamily, int inlen)
238 {
239         register const char *inptr = fontfamily;
240         const char *inend = inptr + inlen;
241         
242         /* don't allow any of '"', '<', nor '>' */
243         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
244                 inptr++;
245         
246         return g_strndup (fontfamily, inptr - fontfamily);
247 }
248
249 static char *
250 param_parse_lang (const char *lang, int inlen)
251 {
252         register const char *inptr = lang;
253         const char *inend = inptr + inlen;
254         
255         /* don't allow any of '"', '<', nor '>' */
256         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
257                 inptr++;
258         
259         return g_strndup (lang, inptr - lang);
260 }
261
262 static char *
263 param_parse (const char *enriched, const char *inptr, int inlen)
264 {
265         int i;
266         
267         for (i = 0; i < NUM_ENRICHED_TAGS; i++) {
268                 if (!strcasecmp (enriched, enriched_tags[i].enriched))
269                         return enriched_tags[i].parse_param (inptr, inlen);
270         }
271         
272         g_assert_not_reached ();
273         
274         return NULL;
275 }
276
277 #define IS_RICHTEXT CAMEL_MIME_FILTER_ENRICHED_IS_RICHTEXT
278
279 static void
280 enriched_to_html (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
281                   char **out, size_t *outlen, size_t *outprespace, gboolean flush)
282 {
283         CamelMimeFilterEnriched *enriched = (CamelMimeFilterEnriched *) filter;
284         const char *tag, *inend, *outend;
285         register const char *inptr;
286         register char *outptr;
287         
288         camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
289         
290         inptr = in;
291         inend = in + inlen;
292         outptr = filter->outbuf;
293         outend = filter->outbuf + filter->outsize;
294         
295  retry:
296         do {
297                 while (inptr < inend && outptr < outend && !strchr (" <>&\n", *inptr))
298                         *outptr++ = *inptr++;
299                 
300                 if (outptr == outend)
301                         goto backup;
302                 
303                 if ((inptr + 1) >= inend)
304                         break;
305                 
306                 switch (*inptr++) {
307                 case ' ':
308                         while (inptr < inend && (outptr + 7) < outend && *inptr == ' ') {
309                                 memcpy (outptr, "&nbsp;", 6);
310                                 outptr += 6;
311                                 inptr++;
312                         }
313                         
314                         if (outptr < outend)
315                                 *outptr++ = ' ';
316                         
317                         break;
318                 case '\n':
319                         if (!(enriched->flags & IS_RICHTEXT) && enriched->nofill <= 0) {
320                                 /* text/enriched */
321                                 while (inptr < inend && (outptr + 4) < outend && *inptr == '\n') {
322                                         memcpy (outptr, "<br>", 4);
323                                         outptr += 4;
324                                         inptr++;
325                                 }
326                         } else {
327                                 /* text/richtext */
328                                 *outptr++ = ' ';
329                         }
330                         break;
331                 case '>':
332                         if ((outptr + 4) < outend) {
333                                 memcpy (outptr, "&gt;", 4);
334                                 outptr += 4;
335                         } else {
336                                 inptr--;
337                                 goto backup;
338                         }
339                         break;
340                 case '&':
341                         if ((outptr + 5) < outend) {
342                                 memcpy (outptr, "&amp;", 5);
343                                 outptr += 5;
344                         } else {
345                                 inptr--;
346                                 goto backup;
347                         }
348                         break;
349                 case '<':
350                         if (!(enriched->flags & IS_RICHTEXT)) {
351                                 /* text/enriched */
352                                 if (*inptr == '<') {
353                                         if ((outptr + 4) < outend) {
354                                                 memcpy (outptr, "&lt;", 4);
355                                                 outptr += 4;
356                                                 inptr++;
357                                                 break;
358                                         } else {
359                                                 inptr--;
360                                                 goto backup;
361                                         }
362                                 }
363                         } else {
364                                 /* text/richtext */
365                                 if ((inend - inptr) >= 3 && (outptr + 4) < outend) {
366                                         if (strncmp (inptr, "lt>", 3) == 0) {
367                                                 memcpy (outptr, "&lt;", 4);
368                                                 outptr += 4;
369                                                 inptr += 3;
370                                                 break;
371                                         } else if (strncmp (inptr, "nl>", 3) == 0) {
372                                                 memcpy (outptr, "<br>", 4);
373                                                 outptr += 4;
374                                                 inptr += 3;
375                                                 break;
376                                         }
377                                 } else {
378                                         inptr--;
379                                         goto backup;
380                                 }
381                         }
382                         
383                         tag = inptr;
384                         while (inptr < inend && *inptr != '>')
385                                 inptr++;
386                         
387                         if (inptr == inend) {
388                                 inptr = tag - 1;
389                                 goto need_input;
390                         }
391                         
392                         if (!strncasecmp (tag, "nofill>", 7)) {
393                                 if ((outptr + 5) < outend) {
394                                         memcpy (outptr, "<pre>", 5);
395                                         enriched->nofill++;
396                                         outptr += 5;
397                                 } else {
398                                         inptr = tag - 1;
399                                         goto backup;
400                                 }
401                         } else if (!strncasecmp (tag, "/nofill>", 8)) {
402                                 if ((outptr + 6) < outend) {
403                                         memcpy (outptr, "</pre>", 6);
404                                         enriched->nofill--;
405                                         outptr += 6;
406                                 } else {
407                                         inptr = tag - 1;
408                                         goto backup;
409                                 }
410                         } else {
411                                 const char *html_tag;
412                                 char *enriched_tag;
413                                 int len;
414                                 
415                                 len = inptr - tag;
416                                 enriched_tag = g_alloca (len + 1);
417                                 memcpy (enriched_tag, tag, len);
418                                 enriched_tag[len] = '\0';
419                                 
420                                 html_tag = g_hash_table_lookup (enriched_hash, enriched_tag);
421                                 
422                                 if (html_tag) {
423                                         if (html_tag_needs_param (html_tag)) {
424                                                 const char *start;
425                                                 char *param;
426                                                 
427                                                 while (inptr < inend && *inptr != '<')
428                                                         inptr++;
429                                                 
430 #define PARAM_TAG_MIN_LEN  (sizeof ("<param>") + sizeof ("</param>") - 1)
431                                                 if (inptr == inend || (inend - inptr) <= PARAM_TAG_MIN_LEN) {
432                                                         inptr = tag - 1;
433                                                         goto need_input;
434                                                 }
435                                                 
436                                                 if (strncasecmp (inptr, "<param>", 7) != 0) {
437                                                         /* ignore the enriched command tag... */
438                                                         inptr -= 1;
439                                                         goto loop;
440                                                 }
441                                                 
442                                                 inptr += 7;
443                                                 start = inptr;
444                                                 
445                                                 while (inptr < inend && *inptr != '<')
446                                                         inptr++;
447                                                 
448                                                 if (inptr == inend || (inend - inptr) <= 8) {
449                                                         inptr = tag - 1;
450                                                         goto need_input;
451                                                 }
452                                                 
453                                                 if (strncasecmp (inptr, "</param>", 8) != 0) {
454                                                         /* ignore the enriched command tag... */
455                                                         inptr += 7;
456                                                         goto loop;
457                                                 }
458                                                 
459                                                 len = inptr - start;
460                                                 param = param_parse (enriched_tag, start, len);
461                                                 len = strlen (param);
462                                                 
463                                                 inptr += 7;
464                                                 
465                                                 len += strlen (html_tag);
466                                                 
467                                                 if ((outptr + len) < outend) {
468                                                         outptr += snprintf (outptr, len, html_tag, param);
469                                                         g_free (param);
470                                                 } else {
471                                                         g_free (param);
472                                                         inptr = tag - 1;
473                                                         goto backup;
474                                                 }
475                                         } else {
476                                                 len = strlen (html_tag);
477                                                 if ((outptr + len) < outend) {
478                                                         memcpy (outptr, html_tag, len);
479                                                         outptr += len;
480                                                 } else {
481                                                         inptr = tag - 1;
482                                                         goto backup;
483                                                 }
484                                         }
485                                 }
486                         }
487                         
488                 loop:
489                         inptr++;
490                         break;
491                 default:
492                         break;
493                 }
494         } while (inptr < inend);
495         
496  need_input:
497         
498         /* the reason we ignore @flush here is because if there isn't
499            enough input to parse a tag, then there's nothing we can
500            do. */
501         
502         if (inptr < inend)
503                 camel_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
504         
505         *out = filter->outbuf;
506         *outlen = outptr - filter->outbuf;
507         *outprespace = filter->outpre;
508         
509         return;
510         
511  backup:
512         
513         if (flush) {
514                 size_t offset, grow;
515                 
516                 grow = (inend - inptr) * 2 + 20;
517                 offset = outptr - filter->outbuf;
518                 camel_mime_filter_set_size (filter, filter->outsize + grow, TRUE);
519                 outend = filter->outbuf + filter->outsize;
520                 outptr = filter->outbuf + offset;
521                 
522                 goto retry;
523         } else {
524                 camel_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
525         }
526         
527         *out = filter->outbuf;
528         *outlen = outptr - filter->outbuf;
529         *outprespace = filter->outpre;
530 }
531
532 static void
533 filter_filter (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
534                char **out, size_t *outlen, size_t *outprespace)
535 {
536         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, FALSE);
537 }
538
539 static void 
540 filter_complete (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
541                  char **out, size_t *outlen, size_t *outprespace)
542 {
543         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, TRUE);
544 }
545
546 static void
547 filter_reset (CamelMimeFilter *filter)
548 {
549         CamelMimeFilterEnriched *enriched = (CamelMimeFilterEnriched *) filter;
550         
551         enriched->nofill = 0;
552 }
553
554
555 /**
556  * camel_mime_filter_enriched_new:
557  * @flags:
558  *
559  * Creates a new CamelMimeFilterEnriched object.
560  *
561  * Returns a new CamelMimeFilter object.
562  **/
563 CamelMimeFilter *
564 camel_mime_filter_enriched_new (guint32 flags)
565 {
566         CamelMimeFilterEnriched *new;
567         
568         new = (CamelMimeFilterEnriched *) camel_object_new (CAMEL_TYPE_MIME_FILTER_ENRICHED);
569         new->flags = flags;
570         
571         return CAMEL_MIME_FILTER (new);
572 }
573
574 char *
575 camel_enriched_to_html(const char *in, guint32 flags)
576 {
577         CamelMimeFilter *filter;
578         size_t outlen, outpre;
579         char *outbuf;
580
581         if (in == NULL)
582                 return NULL;
583         
584         filter = camel_mime_filter_enriched_new(flags);
585         
586         camel_mime_filter_complete(filter, (char *)in, strlen(in), 0, &outbuf, &outlen, &outpre);
587         outbuf = g_strndup (outbuf, outlen);
588         camel_object_unref (filter);
589         
590         return outbuf;
591 }