Fix FSF address (Tobias Mueller, #470445)
[platform/upstream/evolution-data-server.git] / camel / camel-mime-filter-enriched.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  *  Authors: Jeffrey Stedfast <fejj@ximian.com>
4  *
5  *  Copyright 2002 Ximian, Inc. (www.ximian.com)
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU Lesser General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  *
21  */
22
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <stdio.h>
29 #include <string.h>
30
31 #include "camel-mime-filter-enriched.h"
32 #include "camel-string-utils.h"
33
34 /* text/enriched is rfc1896 */
35
36 typedef char * (*EnrichedParamParser) (const char *inptr, int inlen);
37
38 static char *param_parse_colour (const char *inptr, int inlen);
39 static char *param_parse_font (const char *inptr, int inlen);
40 static char *param_parse_lang (const char *inptr, int inlen);
41
42 static struct {
43         char *enriched;
44         char *html;
45         gboolean needs_param;
46         EnrichedParamParser parse_param; /* parses *and* validates the input */
47 } enriched_tags[] = {
48         { "bold",        "<b>",                 FALSE, NULL               },
49         { "/bold",       "</b>",                FALSE, NULL               },
50         { "italic",      "<i>",                 FALSE, NULL               },
51         { "/italic",     "</i>",                FALSE, NULL               },
52         { "fixed",       "<tt>",                FALSE, NULL               },
53         { "/fixed",      "</tt>",               FALSE, NULL               },
54         { "smaller",     "<font size=-1>",      FALSE, NULL               },
55         { "/smaller",    "</font>",             FALSE, NULL               },
56         { "bigger",      "<font size=+1>",      FALSE, NULL               },
57         { "/bigger",     "</font>",             FALSE, NULL               },
58         { "underline",   "<u>",                 FALSE, NULL               },
59         { "/underline",  "</u>",                FALSE, NULL               },
60         { "center",      "<p align=center>",    FALSE, NULL               },
61         { "/center",     "</p>",                FALSE, NULL               },
62         { "flushleft",   "<p align=left>",      FALSE, NULL               },
63         { "/flushleft",  "</p>",                FALSE, NULL               },
64         { "flushright",  "<p align=right>",     FALSE, NULL               },
65         { "/flushright", "</p>",                FALSE, NULL               },
66         { "excerpt",     "<blockquote>",        FALSE, NULL               },
67         { "/excerpt",    "</blockquote>",       FALSE, NULL               },
68         { "paragraph",   "<p>",                 FALSE, NULL               },
69         { "signature",   "<address>",           FALSE, NULL               },
70         { "/signature",  "</address>",          FALSE, NULL               },
71         { "comment",     "<!-- ",               FALSE, NULL               },
72         { "/comment",    " -->",                FALSE, NULL               },
73         { "np",          "<hr>",                FALSE, NULL               },
74         { "fontfamily",  "<font face=\"%s\">",  TRUE,  param_parse_font   },
75         { "/fontfamily", "</font>",             FALSE, NULL               },
76         { "color",       "<font color=\"%s\">", TRUE,  param_parse_colour },
77         { "/color",      "</font>",             FALSE, NULL               },
78         { "lang",        "<span lang=\"%s\">",  TRUE,  param_parse_lang   },
79         { "/lang",       "</span>",             FALSE, NULL               },
80         
81         /* don't handle this tag yet... */
82         { "paraindent",  "<!-- ",               /* TRUE */ FALSE, NULL    },
83         { "/paraindent", " -->",                FALSE, NULL               },
84         
85         /* as soon as we support all the tags that can have a param
86          * tag argument, these should be unnecessary, but we'll keep
87          * them anyway just in case? */
88         { "param",       "<!-- ",               FALSE, NULL               },
89         { "/param",      " -->",                FALSE, NULL               },
90 };
91
92 #define NUM_ENRICHED_TAGS (sizeof (enriched_tags) / sizeof (enriched_tags[0]))
93
94 static GHashTable *enriched_hash = NULL;
95
96
97 static void camel_mime_filter_enriched_class_init (CamelMimeFilterEnrichedClass *klass);
98 static void camel_mime_filter_enriched_init       (CamelMimeFilterEnriched *filter);
99 static void camel_mime_filter_enriched_finalize   (CamelObject *obj);
100
101 static void filter_filter (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
102                            char **out, size_t *outlen, size_t *outprespace);
103 static void filter_complete (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
104                              char **out, size_t *outlen, size_t *outprespace);
105 static void filter_reset (CamelMimeFilter *filter);
106
107
108 static CamelMimeFilterClass *parent_class = NULL;
109
110
111 CamelType
112 camel_mime_filter_enriched_get_type (void)
113 {
114         static CamelType type = CAMEL_INVALID_TYPE;
115         
116         if (type == CAMEL_INVALID_TYPE) {
117                 type = camel_type_register (camel_mime_filter_get_type (),
118                                             "CamelMimeFilterEnriched",
119                                             sizeof (CamelMimeFilterEnriched),
120                                             sizeof (CamelMimeFilterEnrichedClass),
121                                             (CamelObjectClassInitFunc) camel_mime_filter_enriched_class_init,
122                                             NULL,
123                                             (CamelObjectInitFunc) camel_mime_filter_enriched_init,
124                                             (CamelObjectFinalizeFunc) camel_mime_filter_enriched_finalize);
125         }
126         
127         return type;
128 }
129
130 static void
131 camel_mime_filter_enriched_class_init (CamelMimeFilterEnrichedClass *klass)
132 {
133         CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass;
134         int i;
135         
136         parent_class = CAMEL_MIME_FILTER_CLASS (camel_mime_filter_get_type ());
137         
138         filter_class->reset = filter_reset;
139         filter_class->filter = filter_filter;
140         filter_class->complete = filter_complete;
141         
142         if (!enriched_hash) {
143                 enriched_hash = g_hash_table_new (camel_strcase_hash, camel_strcase_equal);
144                 for (i = 0; i < NUM_ENRICHED_TAGS; i++)
145                         g_hash_table_insert (enriched_hash, enriched_tags[i].enriched,
146                                              enriched_tags[i].html);
147         }
148 }
149
150 static void
151 camel_mime_filter_enriched_finalize (CamelObject *obj)
152 {
153         ;
154 }
155
156 static void
157 camel_mime_filter_enriched_init (CamelMimeFilterEnriched *filter)
158 {
159         filter->flags = 0;
160         filter->nofill = 0;
161 }
162
163
164 #if 0
165 static gboolean
166 enriched_tag_needs_param (const char *tag)
167 {
168         int i;
169         
170         for (i = 0; i < NUM_ENRICHED_TAGS; i++)
171                 if (!g_ascii_strcasecmp (tag, enriched_tags[i].enriched))
172                         return enriched_tags[i].needs_param;
173         
174         return FALSE;
175 }
176 #endif
177
178 static gboolean
179 html_tag_needs_param (const char *tag)
180 {
181         return strstr (tag, "%s") != NULL;
182 }
183
184 static const char *valid_colours[] = {
185         "red", "green", "blue", "yellow", "cyan", "magenta", "black", "white"
186 };
187
188 #define NUM_VALID_COLOURS  (sizeof (valid_colours) / sizeof (valid_colours[0]))
189
190 static char *
191 param_parse_colour (const char *inptr, int inlen)
192 {
193         const char *inend, *end;
194         guint32 rgb = 0;
195         guint v;
196         int i;
197         
198         for (i = 0; i < NUM_VALID_COLOURS; i++) {
199                 if (!g_ascii_strncasecmp (inptr, valid_colours[i], inlen))
200                         return g_strdup (valid_colours[i]);
201         }
202         
203         /* check for numeric r/g/b in the format: ####,####,#### */
204         if (inptr[4] != ',' || inptr[9] != ',') {
205                 /* okay, mailer must have used a string name that
206                  * rfc1896 did not specify? do some simple scanning
207                  * action, a colour name MUST be [a-zA-Z] */
208                 end = inptr;
209                 inend = inptr + inlen;
210                 while (end < inend && ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z')))
211                         end++;
212                 
213                 return g_strndup (inptr, end - inptr);
214         }
215         
216         for (i = 0; i < 3; i++) {
217                 v = strtoul (inptr, (char **) &end, 16);
218                 if (end != inptr + 4)
219                         goto invalid_format;
220                 
221                 v >>= 8;
222                 rgb = (rgb << 8) | (v & 0xff);
223                 
224                 inptr += 5;
225         }
226         
227         return g_strdup_printf ("#%.6X", rgb);
228         
229  invalid_format:
230         
231         /* default colour? */
232         return g_strdup ("black");
233 }
234
235 static char *
236 param_parse_font (const char *fontfamily, int inlen)
237 {
238         register const char *inptr = fontfamily;
239         const char *inend = inptr + inlen;
240         
241         /* don't allow any of '"', '<', nor '>' */
242         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
243                 inptr++;
244         
245         return g_strndup (fontfamily, inptr - fontfamily);
246 }
247
248 static char *
249 param_parse_lang (const char *lang, int inlen)
250 {
251         register const char *inptr = lang;
252         const char *inend = inptr + inlen;
253         
254         /* don't allow any of '"', '<', nor '>' */
255         while (inptr < inend && *inptr != '"' && *inptr != '<' && *inptr != '>')
256                 inptr++;
257         
258         return g_strndup (lang, inptr - lang);
259 }
260
261 static char *
262 param_parse (const char *enriched, const char *inptr, int inlen)
263 {
264         int i;
265         
266         for (i = 0; i < NUM_ENRICHED_TAGS; i++) {
267                 if (!g_ascii_strcasecmp (enriched, enriched_tags[i].enriched))
268                         return enriched_tags[i].parse_param (inptr, inlen);
269         }
270         
271         g_assert_not_reached ();
272         
273         return NULL;
274 }
275
276 #define IS_RICHTEXT CAMEL_MIME_FILTER_ENRICHED_IS_RICHTEXT
277
278 static void
279 enriched_to_html (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
280                   char **out, size_t *outlen, size_t *outprespace, gboolean flush)
281 {
282         CamelMimeFilterEnriched *enriched = (CamelMimeFilterEnriched *) filter;
283         const char *tag, *inend, *outend;
284         register const char *inptr;
285         register char *outptr;
286         
287         camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
288         
289         inptr = in;
290         inend = in + inlen;
291         outptr = filter->outbuf;
292         outend = filter->outbuf + filter->outsize;
293         
294  retry:
295         do {
296                 while (inptr < inend && outptr < outend && !strchr (" <>&\n", *inptr))
297                         *outptr++ = *inptr++;
298                 
299                 if (outptr == outend)
300                         goto backup;
301                 
302                 if ((inptr + 1) >= inend)
303                         break;
304                 
305                 switch (*inptr++) {
306                 case ' ':
307                         while (inptr < inend && (outptr + 7) < outend && *inptr == ' ') {
308                                 memcpy (outptr, "&nbsp;", 6);
309                                 outptr += 6;
310                                 inptr++;
311                         }
312                         
313                         if (outptr < outend)
314                                 *outptr++ = ' ';
315                         
316                         break;
317                 case '\n':
318                         if (!(enriched->flags & IS_RICHTEXT)) {
319                                 /* text/enriched */
320                                 if (enriched->nofill > 0) {
321                                         if ((outptr + 4) < outend) {
322                                                 memcpy (outptr, "<br>", 4);
323                                                 outptr += 4;
324                                         } else {
325                                                 inptr--;
326                                                 goto backup;
327                                         }
328                                 } else if (*inptr == '\n') {
329                                         if ((outptr + 4) >= outend) {
330                                                 inptr--;
331                                                 goto backup;
332                                         }
333                                         
334                                         while (inptr < inend && (outptr + 4) < outend && *inptr == '\n') {
335                                                 memcpy (outptr, "<br>", 4);
336                                                 outptr += 4;
337                                                 inptr++;
338                                         }
339                                 } else {
340                                         *outptr++ = ' ';
341                                 }
342                         } else {
343                                 /* text/richtext */
344                                 *outptr++ = ' ';
345                         }
346                         break;
347                 case '>':
348                         if ((outptr + 4) < outend) {
349                                 memcpy (outptr, "&gt;", 4);
350                                 outptr += 4;
351                         } else {
352                                 inptr--;
353                                 goto backup;
354                         }
355                         break;
356                 case '&':
357                         if ((outptr + 5) < outend) {
358                                 memcpy (outptr, "&amp;", 5);
359                                 outptr += 5;
360                         } else {
361                                 inptr--;
362                                 goto backup;
363                         }
364                         break;
365                 case '<':
366                         if (!(enriched->flags & IS_RICHTEXT)) {
367                                 /* text/enriched */
368                                 if (*inptr == '<') {
369                                         if ((outptr + 4) < outend) {
370                                                 memcpy (outptr, "&lt;", 4);
371                                                 outptr += 4;
372                                                 inptr++;
373                                                 break;
374                                         } else {
375                                                 inptr--;
376                                                 goto backup;
377                                         }
378                                 }
379                         } else {
380                                 /* text/richtext */
381                                 if ((inend - inptr) >= 3 && (outptr + 4) < outend) {
382                                         if (strncmp (inptr, "lt>", 3) == 0) {
383                                                 memcpy (outptr, "&lt;", 4);
384                                                 outptr += 4;
385                                                 inptr += 3;
386                                                 break;
387                                         } else if (strncmp (inptr, "nl>", 3) == 0) {
388                                                 memcpy (outptr, "<br>", 4);
389                                                 outptr += 4;
390                                                 inptr += 3;
391                                                 break;
392                                         }
393                                 } else {
394                                         inptr--;
395                                         goto backup;
396                                 }
397                         }
398                         
399                         tag = inptr;
400                         while (inptr < inend && *inptr != '>')
401                                 inptr++;
402                         
403                         if (inptr == inend) {
404                                 inptr = tag - 1;
405                                 goto need_input;
406                         }
407                         
408                         if (!g_ascii_strncasecmp (tag, "nofill>", 7)) {
409                                 if ((outptr + 5) < outend) {
410                                         enriched->nofill++;
411                                 } else {
412                                         inptr = tag - 1;
413                                         goto backup;
414                                 }
415                         } else if (!g_ascii_strncasecmp (tag, "/nofill>", 8)) {
416                                 if ((outptr + 6) < outend) {
417                                         enriched->nofill--;
418                                 } else {
419                                         inptr = tag - 1;
420                                         goto backup;
421                                 }
422                         } else {
423                                 const char *html_tag;
424                                 char *enriched_tag;
425                                 int len;
426                                 
427                                 len = inptr - tag;
428                                 enriched_tag = g_alloca (len + 1);
429                                 memcpy (enriched_tag, tag, len);
430                                 enriched_tag[len] = '\0';
431                                 
432                                 html_tag = g_hash_table_lookup (enriched_hash, enriched_tag);
433                                 
434                                 if (html_tag) {
435                                         if (html_tag_needs_param (html_tag)) {
436                                                 const char *start;
437                                                 char *param;
438                                                 
439                                                 while (inptr < inend && *inptr != '<')
440                                                         inptr++;
441                                                 
442                                                 if (inptr == inend || (inend - inptr) <= 15) {
443                                                         inptr = tag - 1;
444                                                         goto need_input;
445                                                 }
446                                                 
447                                                 if (g_ascii_strncasecmp (inptr, "<param>", 7) != 0) {
448                                                         /* ignore the enriched command tag... */
449                                                         inptr -= 1;
450                                                         goto loop;
451                                                 }
452                                                 
453                                                 inptr += 7;
454                                                 start = inptr;
455                                                 
456                                                 while (inptr < inend && *inptr != '<')
457                                                         inptr++;
458                                                 
459                                                 if (inptr == inend || (inend - inptr) <= 8) {
460                                                         inptr = tag - 1;
461                                                         goto need_input;
462                                                 }
463                                                 
464                                                 if (g_ascii_strncasecmp (inptr, "</param>", 8) != 0) {
465                                                         /* ignore the enriched command tag... */
466                                                         inptr += 7;
467                                                         goto loop;
468                                                 }
469                                                 
470                                                 len = inptr - start;
471                                                 param = param_parse (enriched_tag, start, len);
472                                                 len = strlen (param);
473                                                 
474                                                 inptr += 7;
475                                                 
476                                                 len += strlen (html_tag);
477                                                 
478                                                 if ((outptr + len) < outend) {
479                                                         outptr += snprintf (outptr, len, html_tag, param);
480                                                         g_free (param);
481                                                 } else {
482                                                         g_free (param);
483                                                         inptr = tag - 1;
484                                                         goto backup;
485                                                 }
486                                         } else {
487                                                 len = strlen (html_tag);
488                                                 if ((outptr + len) < outend) {
489                                                         memcpy (outptr, html_tag, len);
490                                                         outptr += len;
491                                                 } else {
492                                                         inptr = tag - 1;
493                                                         goto backup;
494                                                 }
495                                         }
496                                 }
497                         }
498                         
499                 loop:
500                         inptr++;
501                         break;
502                 default:
503                         break;
504                 }
505         } while (inptr < inend);
506         
507  need_input:
508         
509         /* the reason we ignore @flush here is because if there isn't
510            enough input to parse a tag, then there's nothing we can
511            do. */
512         
513         if (inptr < inend)
514                 camel_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
515         
516         *out = filter->outbuf;
517         *outlen = outptr - filter->outbuf;
518         *outprespace = filter->outpre;
519         
520         return;
521         
522  backup:
523         
524         if (flush) {
525                 size_t offset, grow;
526                 
527                 grow = (inend - inptr) * 2 + 20;
528                 offset = outptr - filter->outbuf;
529                 camel_mime_filter_set_size (filter, filter->outsize + grow, TRUE);
530                 outend = filter->outbuf + filter->outsize;
531                 outptr = filter->outbuf + offset;
532                 
533                 goto retry;
534         } else {
535                 camel_mime_filter_backup (filter, inptr, (unsigned) (inend - inptr));
536         }
537         
538         *out = filter->outbuf;
539         *outlen = outptr - filter->outbuf;
540         *outprespace = filter->outpre;
541 }
542
543 static void
544 filter_filter (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
545                char **out, size_t *outlen, size_t *outprespace)
546 {
547         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, FALSE);
548 }
549
550 static void 
551 filter_complete (CamelMimeFilter *filter, char *in, size_t len, size_t prespace,
552                  char **out, size_t *outlen, size_t *outprespace)
553 {
554         enriched_to_html (filter, in, len, prespace, out, outlen, outprespace, TRUE);
555 }
556
557 static void
558 filter_reset (CamelMimeFilter *filter)
559 {
560         CamelMimeFilterEnriched *enriched = (CamelMimeFilterEnriched *) filter;
561         
562         enriched->nofill = 0;
563 }
564
565
566 /**
567  * camel_mime_filter_enriched_new:
568  * @flags: bitwise set of flags to specify filter behaviour
569  *
570  * Create a new #CamelMimeFilterEnriched object to convert input text
571  * streams from text/plain into text/enriched or text/richtext.
572  *
573  * Returns a new #CamelMimeFilterEnriched object
574  **/
575 CamelMimeFilter *
576 camel_mime_filter_enriched_new (guint32 flags)
577 {
578         CamelMimeFilterEnriched *new;
579         
580         new = (CamelMimeFilterEnriched *) camel_object_new (CAMEL_TYPE_MIME_FILTER_ENRICHED);
581         new->flags = flags;
582         
583         return CAMEL_MIME_FILTER (new);
584 }
585
586
587 /**
588  * camel_enriched_to_html:
589  * @in: input textual string
590  * @flags: flags specifying filter behaviour
591  *
592  * Convert @in from text/plain into text/enriched or text/richtext
593  * based on @flags.
594  *
595  * Returns a newly allocated string containing the enriched or
596  * richtext version of @in.
597  **/
598 char *
599 camel_enriched_to_html(const char *in, guint32 flags)
600 {
601         CamelMimeFilter *filter;
602         size_t outlen, outpre;
603         char *outbuf;
604
605         if (in == NULL)
606                 return NULL;
607         
608         filter = camel_mime_filter_enriched_new(flags);
609         
610         camel_mime_filter_complete(filter, (char *)in, strlen(in), 0, &outbuf, &outlen, &outpre);
611         outbuf = g_strndup (outbuf, outlen);
612         camel_object_unref (filter);
613         
614         return outbuf;
615 }